Evidence exists from cancer tissue studies showing cellular interactions between tumor and peritumoral fat tissues. These include tumor tissues using fat reserves in adipocytes as sources of energy in areas of increased hypoxia and oxidative stress and immunosuppression in the tumor microenvironment due to genetic deregulations leading to shifts in necessary intercellular functions such as chemokine production. While new treatments such as immune checkpoint inhibitors have made great strides in reducing the burden of many cancers, the interplay between stromal tissues and tumors must not go unnoted as there have been repeated correlations between tumor microenvironment factors and immune checkpoint treatment efficacy.
This study aims to elucidate the cellular and genetic interactions between tumor and peritumoral fat tissue in a murine B16 melanoma model when treated with anti-PD-1 immune checkpoint inhibitor treatment. The below data analysis examines the scRNA-seq data of ~21K filtered genes in ~88K cells taken from either isotype- or anti-PD-1 treated B6 mice. scRNA-seq data was acquired using CellRanger. This single-cell data was then analyzed using the Scanpy toolkit (Theis et al., 2018, ver. 1.6.0, doi: 10.1186/s13059-017-1382-0) for subsequent cell group clustering and mapping to be used for examining cellular and genetic interactions with and without anti-PD-1 treatment.
Primary Analyst: Jacob Harris (jacob.harris@novartis.com)
Co-Analyst, In vivo Data Acquisition: Bob Haines (bob.haines@novartis.com)
# Install required package versions within the Jupyter notebook kernel:
# !{sys.executable} -m pip install <package_name>==version_num --user
### Note: It is highly recommended that the user create a new venv for the needed packages:
### python3 -m venv <DIR>
### source <DIR>/bin/activate
# python==3.8.5
# anndata==0.7.4
# scanpy==1.6.0
# sinfo==0.3.1
# IPython==7.19.0
# jupyter_client==6.1.7
# jupyter_core==4.7.0
# jupyterlab==2.2.9
# notebook==6.1.6
# louvain==0.7.0
# leidenalg==0.8.3
# Packages
import sys
import os
import anndata as ann
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib
from matplotlib import rcParams
from matplotlib import colors
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import leidenalg
import re
import scipy
import math
import statistics
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.logging.print_header()
sc.settings.set_figure_params(dpi = 80)
pd.show_versions(as_json = False)
WARNING: If you miss a compact list, please try `print_header`! ----- anndata 0.7.4 scanpy 1.6.0 sinfo 0.3.1 ----- PIL 8.1.0 anndata 0.7.4 appnope 0.1.2 backcall 0.2.0 cairo 1.20.0 cffi 1.14.4 cloudpickle 1.6.0 colorama 0.4.4 constants NA cycler 0.10.0 cython_runtime NA cytoolz 0.11.0 dask 2021.01.0 dateutil 2.8.1 decorator 4.4.2 get_version 2.1 h5py 2.10.0 highs_wrapper NA igraph 0.8.3 ipykernel 5.3.4 ipython_genutils 0.2.0 ipywidgets 7.6.3 jedi 0.17.0 joblib 1.0.0 kiwisolver 1.3.0 legacy_api_wrap 0.0.0 leidenalg 0.8.3 llvmlite 0.34.0 louvain 0.7.0 matplotlib 3.3.2 mpl_toolkits NA natsort 7.0.1 numba 0.51.2 numexpr 2.7.2 numpy 1.19.2 packaging 20.8 pandas 1.2.0 parso 0.8.1 pexpect 4.8.0 pickleshare 0.7.5 pkg_resources NA prompt_toolkit 3.0.8 ptyprocess 0.7.0 pygments 2.7.4 pyparsing 2.4.7 pytz 2020.5 scanpy 1.6.0 scipy 1.6.0 seaborn 0.11.0 setuptools_scm NA sinfo 0.3.1 six 1.15.0 sklearn 0.23.2 sphinxcontrib NA statsmodels 0.11.1 storemagic NA tables 3.6.1 texttable 1.6.3 tlz 0.11.0 toolz 0.11.1 tornado 6.1 traitlets 5.0.5 wcwidth 0.2.5 yaml 5.3.1 zmq 20.0.0 ----- IPython 7.19.0 jupyter_client 6.1.7 jupyter_core 4.7.0 jupyterlab 2.2.9 notebook 6.1.6 ----- Python 3.8.5 (default, Sep 4 2020, 02:22:02) [Clang 10.0.0 ] macOS-10.15.7-x86_64-i386-64bit 8 logical CPU cores, i386 ----- Session information updated at 2021-04-21 16:14 scanpy==1.6.0 anndata==0.7.4 umap==0.4.6 numpy==1.19.2 scipy==1.6.0 pandas==1.2.0 scikit-learn==0.23.2 statsmodels==0.11.1 python-igraph==0.8.3 louvain==0.7.0 leidenalg==0.8.3 INSTALLED VERSIONS ------------------ commit : 3e89b4c4b1580aa890023fc550774e63d499da25 python : 3.8.5.final.0 python-bits : 64 OS : Darwin OS-release : 19.6.0 Version : Darwin Kernel Version 19.6.0: Tue Jan 12 22:13:05 PST 2021; root:xnu-6153.141.16~1/RELEASE_X86_64 machine : x86_64 processor : i386 byteorder : little LC_ALL : None LANG : en_US.UTF-8 LOCALE : en_US.UTF-8 pandas : 1.2.0 numpy : 1.19.2 pytz : 2020.5 dateutil : 2.8.1 pip : 20.3.3 setuptools : 51.1.2.post20210112 Cython : 0.29.21 pytest : None hypothesis : None sphinx : 3.4.3 blosc : None feather : None xlsxwriter : None lxml.etree : None html5lib : None pymysql : None psycopg2 : None jinja2 : 2.11.2 IPython : 7.19.0 pandas_datareader: None bs4 : None bottleneck : None fsspec : None fastparquet : None gcsfs : None matplotlib : 3.3.2 numexpr : 2.7.2 odfpy : None openpyxl : None pandas_gbq : None pyarrow : None pyxlsb : None s3fs : None scipy : 1.6.0 sqlalchemy : None tables : 3.6.1 tabulate : None xarray : None xlrd : None xlwt : None numba : 0.51.2
# Check current working directory
print('Current working directory:', os.getcwd())
# If desired, uncomment and edit necessary directory constants below to change wd
# NEW_WD = os.path.join()
# os.makedirs(NEW_WD)
# os.chdir(NEW_WD)
# print('New working directory:', os.getcwd())
Current working directory: /Users/harrij1d/Desktop/Bob_expB_v1
Preprocessed data of scRNA-seq can be found in L-Drive folder L://usca-dfs/LABDATA/LABS/IO/USCA-VIRTUAL-I22153/CIPOLDA3/2021/JacobH/Bob_postdoc/ExpB_v1.0_final
# Load in raw 10X genomic data
sc.__dict__.keys()
genome_string = "mm10"
filename = "raw_gene_bc_matrices_norm_better.h5"
adata = sc.read_10x_h5(filename, genome = genome_string)
reading raw_gene_bc_matrices_norm_better.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:41)
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]): Variable names are not unique. To make them unique, call `.var_names_make_unique`.
# Load highly_variable_genes and raw gene expression data as anndata set
adata = ann.read_h5ad('experimentB_combined_norm_better_step5_final_modified_for_interaction_analysis.h5ad')
adata
AnnData object with n_obs × n_vars = 88173 × 20737
obs: 'Barcode', 'library_id', 'molecule_h5', 'lab_sample_id', 'sample_name', 'Model', 'Type', 'Location', 'Treatment', 'Replicate', 'cell_fraction', 'Timepoint', 'n_genes', 'cell_type', 'percent_mito', 'percent_ribo', 'n_counts', 'leiden', 'Epcam', 'Pax7', 'lin_mark_present', 'doublet_scores', 'predicted_doublets', 'batch'
var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'cell_type_colors', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap'
obsm: 'X_pca', 'X_umap'
Throughout our analysis, we relied on clustering algorithms in order to group similar cells for the purposes of visualizations and cell type identification. Scanpy uses an algorithm known as the Leiden algorithm, one that is more recent than the more well-known Louvain algorithm which has shown improved community recognition and speed (Traag et al., 2019, doi: 10.1038/s41598-019-41695-z).
# Generate UMAP of numbered Leiden clusters (automatically saved to figures folder)
sc.pl.umap(adata, color=['leiden'], legend_loc='on data', legend_fontsize=8, legend_fontoutline=1,
title='Leiden Clustering', save='_leiden_clustering.pdf')
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
WARNING: saving figure to file figures/umap_leiden_clustering.pdf
Normal anndata is stored as a numpy array matrix (adata.X) with rows corresponding to single cells (adata.obs) and columns to gene expression levels (adata.var) which can be frozen as an adata.raw array with normalized values to be used for differential testing and visualizations. Normally, this is done when tagging and filtering out any genes which are deemed "not highly variable" in order to save on computation time for analysis and focus in on a select group set of genes. However, we wanted to use the raw, unnormalized adata as our dataset to hopefully gain insight into all of the genes present in our cells.
However, as the normal Scanpy workflow usually involves working with a filtered and normalized adata set, we had to first convert our adata.raw set to a Scipy sparse matrix (adata.raw.X) and then convert that to a numpy array, adata.X.toarray(), which could be saved to the adata.X to convert the raw adata to a form which could be used for computations going forward.
# Convert adata.raw -> scipy -> numpy -> adata
# adata = adata.raw.to_adata() -> adata is already stored as adata.raw
adata.X = adata.X.toarray() # converts adata.raw.X (scipy matrix) to adata.raw.X.toarray() (numpy array, useable)
adata.X # normal adata array
array([[0. , 0. , 0. , ..., 1.5098591, 1.5098591,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
...,
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ]], dtype=float32)
# Set AnnData to the normalized and logarithized gene expression for use in differentials
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
normalizing counts per cell
finished (0:00:01)
# Regress out for n_counts and % of mitochondrial genes expressed
sc.pp.regress_out(adata, ['n_counts', 'percent_mito'])
sc.pp.scale(adata, max_value = 10) # scale data to unit variance
regressing out ['n_counts', 'percent_mito']
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
finished (0:36:16)
# Save progress of work on adata for quick reloading
os.mkdir('adata_steps') # make folder for later adata checkpoints
adata_step5 = os.path.join('adata_steps', 'expB_v1_step5_preprocessing.h5ad') # allow for file dir path across Windows/Mac/Unix OS
adata.write(adata_step5)
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
To get an idea of what types of cells were in each of the identified Leiden clusters, we looked at genetic expression levels of characteristic genes that would help distinguish between Cd45- stromal cells (tumor, endothelial, and fibroblasts) and Cd45+ immune cells. We did much of this using visualization tools, namely matrixplots, to name Leiden cluster numbers based on predicted cell types and optional biomarker genetic tags and/or tissue location (F for fat or T for tumor).
Note: All figures are automatically saved to the 'figures' folder.
# Leiden cell cluster dendrogram
sc.tl.dendrogram(adata, groupby = 'leiden')
using 'X_pca' with n_pcs = 50 Storing dendrogram info using `.uns["dendrogram_['leiden']"]`
# Tumor (Rhox5+), endothelial (Pecam1+), fibroblast (Thy1+), and immune cell (Ptprc, a.k.a. Cd45+) cluster matrixplots
sc.pl.umap(adata, color=['Rhox5', 'Pecam1', 'Thy1', 'Ptprc'], save='_tumor_endo_fibro_immune.pdf')
WARNING: saving figure to file figures/umap_tumor_endo_fibro_immune.pdf
# Matrixplot of Cd45+ cell genetic markers
markers = ["Ptprc", "Itgam", "Adgre1", "H2-Aa", "Ly6c2", "Itgax", "Itgae", "Cd3e",
"Ncr1", "Cd19", "Sdc1", "Siglecf", "Siglech", "Cma1", "Ly6g", "Cxcl2"]
sc.pl.matrixplot(adata, markers, groupby = 'leiden', dendrogram = True, swap_axes = True, standard_scale='var', save='cd45pos_markers.pdf')
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key]) /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]): /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/pandas/core/arrays/categorical.py:2487: FutureWarning: The `inplace` parameter in pandas.Categorical.remove_unused_categories is deprecated and will be removed in a future version. res = method(*args, **kwargs)
WARNING: saving figure to file figures/matrixplot_cd45pos_markers.pdf
# Matrixplot of Cd45- cell genetic markers
markers = ["Pecam1", "Lyve1", "Flt4", "Acta2", "Pparg", "Thy1", "Pdpn", "Fap", "Ly6a", "Ly6e", "Pdgfra", "Cd34"]
sc.pl.matrixplot(adata, markers, groupby = 'leiden', dendrogram = True, swap_axes = True, standard_scale='var', save='cd45neg_markers.pdf')
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key]) /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]): /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/pandas/core/arrays/categorical.py:2487: FutureWarning: The `inplace` parameter in pandas.Categorical.remove_unused_categories is deprecated and will be removed in a future version. res = method(*args, **kwargs)
WARNING: saving figure to file figures/matrixplot_cd45neg_markers.pdf
# Matrixplot of all cell markers
markers = ["Ptprc", "Pecam1", "Pparg", "Igfbp3", "Thy1", "Cd55", "Gpx3", "Il33", "Postn", "Cd19",
"Cd3g", "Ncr1", "Itgam", "H2-Aa", "Ly6c2", "Cebpb", "Retnla", "Ear2", "Arg1", "Mrc1",
"Adgre1", "Itgax", "Itgae", "Ccr7", "Mki67", "Cxcl2", "Siglech", "Cma1", "Ccl4"]
sc.pl.matrixplot(adata, markers, groupby = 'leiden', dendrogram = True, swap_axes = True, standard_scale='var', save='all_cell_markers.pdf')
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key]) /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]): /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/pandas/core/arrays/categorical.py:2487: FutureWarning: The `inplace` parameter in pandas.Categorical.remove_unused_categories is deprecated and will be removed in a future version. res = method(*args, **kwargs)
WARNING: saving figure to file figures/matrixplot_all_cell_markers.pdf
We then assembled a list of all of the predicted cell types and assigned each name to each corresponding Leiden cluster number in an adata.obs column 'cluster_name' for each cell. Future users are encouraged to create their own cluster naming conventions based on the nature of their experiment.
# Define cluster_names and leiden_clusters lists
cluster_names = ['tumor0', 'endo1_T', 'tumor2', 'mac3_Arg1_T', 'endo4_F', 'mac5_F', 'mono6_T', 'mac_mono7_T', 'DC8', 'fib9_IL33', 'myel10_Retnla_FatC',
'mac11_F', 'NK12', 'mac_mono13_mix', 'fib14_Postn', 'T_cell15', 'endo16_T', 'tumor17', 'fib_myel18', 'peri19_T', 'neutro20',
'fib21_nonU', 'tumor22', 'fib23_T', 'B_cell24', 'tumor25', 'myel26_poorQ', 'fib27_Gpx3', 'migDC28', 'peri_endo29_F', 'prol_myel30',
'mast_baso31', 'tumor32', 'pDC33', 'endo34_mix', 'eos35']
leiden_clusters = [int(i) for i in adata.obs['leiden'].unique()]
leiden_clusters.sort()
# Assign cell types to each Leiden cluster
cell_types = []
for cell in range(len(adata.obs)):
cluster_num = str(adata.obs['leiden'][cell])
if cluster_num == '0':
cell_types.append('tumor0')
elif cluster_num == '1':
cell_types.append('endo1_T')
elif cluster_num == '2':
cell_types.append('tumor2')
elif cluster_num == '3':
cell_types.append('mac3_Arg1_T')
elif cluster_num == '4':
cell_types.append('endo4_F')
elif cluster_num == '5':
cell_types.append('mac5_F')
elif cluster_num == '6':
cell_types.append('mono6_T')
elif cluster_num == '7':
cell_types.append('mac_mono7_T')
elif cluster_num == '8':
cell_types.append('DC8')
elif cluster_num == '9':
cell_types.append('fib9_IL33')
elif cluster_num == '10':
cell_types.append('myel10_Retnla_FatC')
elif cluster_num == '11':
cell_types.append('mac11_F')
elif cluster_num == '12':
cell_types.append('NK12')
elif cluster_num == '13':
cell_types.append('mac_mono13_mix')
elif cluster_num == '14':
cell_types.append('fib14_Postn')
elif cluster_num == '15':
cell_types.append('T_cell15')
elif cluster_num == '16':
cell_types.append('endo16_T')
elif cluster_num == '17':
cell_types.append('tumor17')
elif cluster_num == '18':
cell_types.append('fib_myel18')
elif cluster_num == '19':
cell_types.append('peri19_T')
elif cluster_num == '20':
cell_types.append('neutro20')
elif cluster_num == '21':
cell_types.append('fib21_nonU')
elif cluster_num == '22':
cell_types.append('tumor22')
elif cluster_num == '23':
cell_types.append('fib23_T')
elif cluster_num == '24':
cell_types.append('B_cell24')
elif cluster_num == '25':
cell_types.append('tumor25')
elif cluster_num == '26':
cell_types.append('myel26_poorQ')
elif cluster_num == '27':
cell_types.append('fib27_Gpx3')
elif cluster_num == '28':
cell_types.append('migDC28')
elif cluster_num == '29':
cell_types.append('peri_endo29_F')
elif cluster_num == '30':
cell_types.append('prol_myel30')
elif cluster_num == '31':
cell_types.append('mast_baso31')
elif cluster_num == '32':
cell_types.append('tumor32')
elif cluster_num == '33':
cell_types.append('pDC33')
elif cluster_num == '34':
cell_types.append('endo34_mix')
elif cluster_num == '35':
cell_types.append('eos35')
adata.obs['cluster_name'] = cell_types
# UMAP of named cell clusters
sc.pl.umap(adata, color=['cluster_name'], legend_loc='on data', legend_fontsize=8, legend_fontoutline=1,
title='Cluster Cell Types', save='_cluster_cell_types.pdf')
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key]) ... storing 'cluster_name' as categorical
WARNING: saving figure to file figures/umap_cluster_cell_types.pdf
num_cells = [140.707, 104.043, 95.593, 92.605, 88.298, 88.220, 88.173]
cells_filtered = []
for i in range(len(num_cells)-1):
cells_filtered.append(num_cells[i] - num_cells[i+1])
# after Initial Min. Cell/Gene Filter = 140707 reads
labels = ['Initial Filter','Undesireable Samples', 'Cells w/ <3500 Genes', '1st Clustering Round',
'Lineage Markers', 'Doublets', 'Final Clustering']
y_labels = range(0, 161, 20)
fig, ax = plt.subplots(figsize=(50, 30))
ax.bar(labels, num_cells, align='center')
ax.set_xticklabels(labels, rotation=20, ha='right', fontsize=40)
#ax.set_xlabel('Filtering Step', fontsize=40)
ax.set_yticklabels(y_labels, fontsize=40)
ax.set_ylabel('Num. of Reads x 1000', fontsize=40)
ax.grid(False)
for i, v in enumerate(num_cells): # text label above bar
ax.text(i-0.13, v+2, str(int(v*1000)), fontsize=40)
ax.set_title('Number of Reads at Each Filtering Step', fontsize=55, fontweight='bold')
plt.savefig(os.path.join('figures', 'barplot_filtering_steps.pdf'))
plt.show()
<ipython-input-18-55a1b1db0f60>:12: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(labels, rotation=20, ha='right', fontsize=40) <ipython-input-18-55a1b1db0f60>:14: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(y_labels, fontsize=40)
# Save progress
adata_step6 = os.path.join('adata_steps', 'expB_v1_step6_cluster_names.h5ad')
adata.write(adata_step6)
adata
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
AnnData object with n_obs × n_vars = 88173 × 20737
obs: 'Barcode', 'library_id', 'molecule_h5', 'lab_sample_id', 'sample_name', 'Model', 'Type', 'Location', 'Treatment', 'Replicate', 'cell_fraction', 'Timepoint', 'n_genes', 'cell_type', 'percent_mito', 'percent_ribo', 'n_counts', 'leiden', 'Epcam', 'Pax7', 'lin_mark_present', 'doublet_scores', 'predicted_doublets', 'batch', 'cluster_name'
var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'cell_type_colors', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap', 'log1p', "dendrogram_['leiden']", 'cluster_name_colors'
obsm: 'X_pca', 'X_umap'
At this point we separated our full adata set into subsets made up of those cells found in specified locations so that we could compare their genetic and cellular characteristics going forward. These include:
# UMAP of cell locations - FatT, FatC, Tumor
sc.pl.umap(adata, color=['cell_type'], title='Cell Locations', save='_cell_type_locations.pdf')
WARNING: saving figure to file figures/umap_cell_type_locations.pdf
# UMAP of iso/PD1 treatments
adata.uns['Treatment_colors'] = ["#000000", "#FF0000"]
sc.pl.umap(adata, color = ['Treatment'], title='Treatments', save = "_cluster_treatment_both.pdf")
PD1 = adata[adata.obs['Treatment'].isin(["PD1"])]
isotype = adata[adata.obs['Treatment'].isin(["Iso"])]
sc.pl.umap(PD1, color = ['Treatment'], title='aPD1 Treatment', save = "_cluster_treatment_PD1.pdf")
sc.pl.umap(isotype, color = ['Treatment'], title='Isotype Treatment', save = "_cluster_treatment_iso.pdf")
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
WARNING: saving figure to file figures/umap_cluster_treatment_both.pdf
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]): /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/pandas/core/arrays/categorical.py:2487: FutureWarning: The `inplace` parameter in pandas.Categorical.remove_unused_categories is deprecated and will be removed in a future version. res = method(*args, **kwargs) /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
WARNING: saving figure to file figures/umap_cluster_treatment_PD1.pdf
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
WARNING: saving figure to file figures/umap_cluster_treatment_iso.pdf
# Specify adata sets for FatT, FatC, OnlyT, All tumor (AllT, FatT + onlyT)
# NOTE: "allT" (FatT + OnlyT datapoints) datasets are included as comments if
# researchers want for examination but are not required for full data analysis
# Only FatT tissue
fat_adata = adata[adata.obs['Type'].isin(['Fat'])]
fatT_adata = fat_adata[fat_adata.obs['Location'].isin(['T'])]
fatT_path = os.path.join('adata_steps', 'expB_v1_step7_fatT.h5ad')
fatT_adata.write(fatT_path)
adataFatT = ann.read_h5ad(fatT_path)
print(adataFatT)
print(' ')
# Only FatC tissue
fatC_adata = fat_adata[fat_adata.obs['Location'].isin(['C'])]
fatC_path = os.path.join('adata_steps', 'expB_v1_step7_fatC.h5ad')
fatC_adata.write(fatC_path)
adataFatC = ann.read_h5ad(fatC_path)
print(adataFatC)
print(' ')
# Only tumor tissue
tumor_adata = adata[adata.obs['Type'].isin(['Tumor'])]
onlyT_path = os.path.join('adata_steps', 'expB_v1_step7_onlyT.h5ad')
tumor_adata.write(onlyT_path)
adataOnlyT = ann.read_h5ad(onlyT_path)
print(adataOnlyT)
print(' ')
# All tissue around tumor (FatT + OnlyT)
all_tumor = adata[adata.obs['Location'].isin(['T'])]
allTumor_path = os.path.join('adata_steps', 'expB_v1_step7_allT.h5ad')
all_tumor.write(allTumor_path)
adataAllT = ann.read_h5ad(allTumor_path)
print(adataAllT)
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]): /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/pandas/core/arrays/categorical.py:2487: FutureWarning: The `inplace` parameter in pandas.Categorical.remove_unused_categories is deprecated and will be removed in a future version. res = method(*args, **kwargs) /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
AnnData object with n_obs × n_vars = 13579 × 20737
obs: 'Barcode', 'library_id', 'molecule_h5', 'lab_sample_id', 'sample_name', 'Model', 'Type', 'Location', 'Treatment', 'Replicate', 'cell_fraction', 'Timepoint', 'n_genes', 'cell_type', 'percent_mito', 'percent_ribo', 'n_counts', 'leiden', 'Epcam', 'Pax7', 'lin_mark_present', 'doublet_scores', 'predicted_doublets', 'batch', 'cluster_name'
var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'Treatment_colors', 'cell_type_colors', 'cluster_name_colors', "dendrogram_['leiden']", 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap'
obsm: 'X_pca', 'X_umap'
AnnData object with n_obs × n_vars = 22641 × 20737
obs: 'Barcode', 'library_id', 'molecule_h5', 'lab_sample_id', 'sample_name', 'Model', 'Type', 'Location', 'Treatment', 'Replicate', 'cell_fraction', 'Timepoint', 'n_genes', 'cell_type', 'percent_mito', 'percent_ribo', 'n_counts', 'leiden', 'Epcam', 'Pax7', 'lin_mark_present', 'doublet_scores', 'predicted_doublets', 'batch', 'cluster_name'
var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'Treatment_colors', 'cell_type_colors', 'cluster_name_colors', "dendrogram_['leiden']", 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap'
obsm: 'X_pca', 'X_umap'
AnnData object with n_obs × n_vars = 51953 × 20737
obs: 'Barcode', 'library_id', 'molecule_h5', 'lab_sample_id', 'sample_name', 'Model', 'Type', 'Location', 'Treatment', 'Replicate', 'cell_fraction', 'Timepoint', 'n_genes', 'cell_type', 'percent_mito', 'percent_ribo', 'n_counts', 'leiden', 'Epcam', 'Pax7', 'lin_mark_present', 'doublet_scores', 'predicted_doublets', 'batch', 'cluster_name'
var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'Treatment_colors', 'cell_type_colors', 'cluster_name_colors', "dendrogram_['leiden']", 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap'
obsm: 'X_pca', 'X_umap'
AnnData object with n_obs × n_vars = 65532 × 20737
obs: 'Barcode', 'library_id', 'molecule_h5', 'lab_sample_id', 'sample_name', 'Model', 'Type', 'Location', 'Treatment', 'Replicate', 'cell_fraction', 'Timepoint', 'n_genes', 'cell_type', 'percent_mito', 'percent_ribo', 'n_counts', 'leiden', 'Epcam', 'Pax7', 'lin_mark_present', 'doublet_scores', 'predicted_doublets', 'batch', 'cluster_name'
var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'Treatment_colors', 'cell_type_colors', 'cluster_name_colors', "dendrogram_['leiden']", 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap'
obsm: 'X_pca', 'X_umap'
# Stacked bar plot for percentages of location of clusters in each cell location
labels = [cluster for cluster in cluster_names]
percentages = range(0, 101, 20)
width = 0.5
fatT_perc = [(adataFatT[adataFatT.obs['leiden'].isin([str(i)])].n_obs) /
(adata[adata.obs['leiden'].isin([str(i)])].n_obs) * 100 for i in range(len(cluster_names))]
fatC_perc = [(adataFatC[adataFatC.obs['leiden'].isin([str(i)])].n_obs) /
(adata[adata.obs['leiden'].isin([str(i)])].n_obs) * 100 for i in range(len(cluster_names))]
onlyT_perc = [(adataOnlyT[adataOnlyT.obs['leiden'].isin([str(i)])].n_obs) /
(adata[adata.obs['leiden'].isin([str(i)])].n_obs) * 100 for i in range(len(cluster_names))]
total_fat_perc = [(fatT_perc[i] + fatC_perc[i]) for i in range(len(labels))]
fig, ax = plt.subplots(figsize=(70, 40))
ax.bar(labels, fatT_perc, width, align='center', label='FatT', color='tab:orange', zorder=3)
ax.bar(labels, fatC_perc, width, align='center', bottom=fatT_perc, label='FatC', color='tab:blue', zorder=3)
ax.bar(labels, onlyT_perc, width, align='center', bottom=total_fat_perc, label='Tumor', color='tab:green', zorder=3)
ax.grid(zorder=0) # put grid marks behind bars
ax.set_xticklabels(labels, rotation=45, ha='right', fontsize=50)
ax.set_xlabel('Cluster Name', fontweight='bold', fontsize=50)
ax.set_yticklabels(percentages, fontsize=50)
ax.set_ylabel('Percentage', fontweight='bold', fontsize=50)
ax.set_title('Percentage of Clusters by Cell Locations', fontweight='bold', fontsize=60)
ax.legend(fontsize=35)
plt.plot()
plt.savefig(os.path.join('figures', 'barplot_clusters_by_cell_type.pdf'))
<ipython-input-23-f91da902710c>:19: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(labels, rotation=45, ha='right', fontsize=50) <ipython-input-23-f91da902710c>:21: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(percentages, fontsize=50)
# Stacked bar plot for percentages of treatment in each cluster
labels = [cluster for cluster in cluster_names]
width = 0.5
iso_perc = [(isotype[isotype.obs['leiden'].isin([str(i)])].n_obs) /
(adata[adata.obs['leiden'].isin([str(i)])].n_obs) * 100 for i in range(len(cluster_names))]
PD1_perc = [(PD1[PD1.obs['leiden'].isin([str(i)])].n_obs) /
(adata[adata.obs['leiden'].isin([str(i)])].n_obs) * 100 for i in range(len(cluster_names))]
fig, ax = plt.subplots(figsize=(70, 40))
ax.bar(labels, iso_perc, width, align='center', label='Isotype', color='k', zorder=3)
ax.bar(labels, PD1_perc, width, align='center', bottom=iso_perc, label='aPD1', color='r', zorder=3)
ax.grid(zorder=0) # put grid marks behind bars
ax.set_xticklabels(labels, rotation=45, ha='right', fontsize=50)
ax.set_xlabel('Cluster Name', fontweight='bold', fontsize=50)
ax.set_yticklabels(percentages, fontsize=50)
ax.set_ylabel('Percentage', fontweight='bold', fontsize=50)
ax.set_title('Percentage of Clusters by Treatment', fontweight='bold', fontsize=60)
ax.legend(fontsize=35)
plt.plot()
plt.savefig(os.path.join('figures', 'barplot_clusters_by_treatment.pdf'))
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]): /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/pandas/core/arrays/categorical.py:2487: FutureWarning: The `inplace` parameter in pandas.Categorical.remove_unused_categories is deprecated and will be removed in a future version. res = method(*args, **kwargs) <ipython-input-24-b35da29480d8>:13: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(labels, rotation=45, ha='right', fontsize=50) <ipython-input-24-b35da29480d8>:15: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(percentages, fontsize=50)
The following algorithm allowed us to compare the full scope of genetic and cellular interactions between every cluster based on isotype versus anti-PD-1 treatments and the separated tissue adata subsets. A list of ~2300 curated ligand-receptor gene pairs by Hou et al. (2020, doi: 10.1038/s41467-020-18873-z) serves as a reference for gene pairs within all adata subsets. The genetic and cellular characteristics of each cluster is specified, including the number of genes and cells per cluster along with the means of genetic expression for each gene. The mean genetic expression levels of corresponding ligands and receptors are multiplied to give a product, here deemed LigXRec, to quantify the level of interactions between clusters. Due to these values differing based on treatment, we use these LigXRec values going forward as quantifiable indicators of change in cell-cell interaction within and between our tissue subsets.
At the top of the algorithm is a 'Constants' heading where users should input strings for treatment and adata tissue subset as notated by the comments. For the purposes of this experiment, the entire algorithm is encapsulated within a FOR loop that allows for the comparison of every possible cluster interaction. If users are interested in the interaction between a specific cell cluster and the rest of the adata set, then they should edit either the iCellType (ligand) or jCellType (receptor) variables with the cluster_name variable in question under code chunk 9 inside the 'subCompCellLigRecPairs' function. These cluster variables are notated with comments. However, if users are only interested in the interaction between two specific clusters, then the top FOR loop can be deleted entirely while the iCellType ligand variable and jCellType receptor variable can be altered in code chunk 9 as needed.
Primary LigXRec Interaction Algorithm and pygenomicstools.py Author: Joseph Zhou (joseph.zhou@novartis.com)
NOTE: Make sure to have file 'pygenomicstools.py' in present wd. This process is computationally intensive and requires many hours. If running for all clusters and both treatments, set aside 2-3 days in order to fully complete calculations on a typical local workstation. For comparison, these results were generated on a Macbook Pro with 16GB RAM and a 2.9 GHz quad-core CPU.
# instructions:
# 1. Into pwd, put in folder titled ZLP020_scCellNet. Add to it folders "Data" and "Results".
# In Data, add the ligand-receptor .csv file (should have 2 columns). Put pygenomicstools.py in the pwd
# 2. Alter paths and file names in code chunks 10,11,13
# 3. Determine what to compare with code from code chunk 12:
# adata_interactome.obs = adata_interactome.obs.rename(columns = {'leiden': 'cluster'})
# leiden is the .obs value being compared. Can't be an integer value
# 4. Determine what treatment group to compare in code chunk 12:
# pTreatment = 'Iso' will mean that all Iso samples will be compared
# 5. In code chunk 9, alter the obs.X value by which to do comparisons from two separate times:
# for example, setting iCellType = 'two' and jCellType = 'two' means that leiden cluster 'two' will be compared to all other clusters
# 6. Run code chunks 1-13
# 7. Run the program using the code block after code chunk 13
# Make folders of each cluster for adataType LigXRec folders (only need to run once)
# Ligand-receptor pairs data - INSERT 'ligand_receptor_pairs_all_vNATMI.csv/.txt' files into this folder
data_path = os.path.join('ZLP020_scCellNet', 'Data')
os.makedirs(data_path)
# LigXRec results directories for each cluster
results_path = os.path.join('ZLP020_scCellNet', 'Results')
adata_types = ['fatT', 'fatC', 'onlyT', 'allT']
for adata in adata_types:
for cluster in cluster_names:
os.makedirs(os.path.join(results_path, 'LigXRec_interactions_' + adata, cluster, ''))
# Code chunk 1 - import Python packages and configure the initial parameters
import time
import timeit
# from graphviz import Digraph # need to run "pip --proxy https://nibr-proxy.global.nibr.novartis.net:2011 install graphviz" in the terminal before
import multiprocessing as mp
from functools import partial
from tqdm import tqdm
import gc
import pygenomicstools as pgt # make sure pygenomicstools.py is in pwd
# Calculate all ligandXreceptor cluster interactions - consider commenting out chunks 2 and 3 and graphviz package requirement above
## DO NOT RUN CODE CHUNKS 2 & 3
# Constants - Modify as needed while computing
TREATMENT = 'PD1' # type either 'Iso' or 'PD1'
ADATA_TYPE = 'onlyT' # type adata set ('fatT', 'fatC', 'onlyT', or 'allT')
for cluster in cluster_names:
# # Code chunk 2 - Plot cell-cell interaction network from defined top pairs of ligand-receptors
# # note - networkx package is NOT beautiful
# def draw_cellNet_Label(celltype_names, graph, fig_name, labels, colors):
# # create graphviz graph
# G = Digraph(fig_name, engine="dot")
# G.attr(rankdir='LR', size='10,8')
# # add nodes
# G.attr('node', shape='oval')
# for inode, iColor in zip(celltype_names, colors):
# G.node(inode, style="rounded,filled,bold", fillcolor=iColor)
# # add edges and label
# for edge, ilabel in zip(graph, labels):
# G.edge(edge[0], edge[1], label=ilabel)
# # display the figure
# return (G)
# # Code chunk 3 - List cell types in top ligand/receptor pairs
# def plot_cell_interaction_net(ligRecInteraction, fig_name):
# if ligRecInteraction.empty:
# print('No ligand-receptor interactions selected!')
# else:
# celltype_names = ligRecInteraction.iloc[:, 2:4].unstack().drop_duplicates().values.tolist()
# dfEdge = ligRecInteraction.iloc[:, 2:4]
# dfEdge.head()
# graph_celltype = [tuple(x) for x in dfEdge.values]
# # create ligand /receptor pairs as labels
# dfLigRec_sel = ligRecInteraction.iloc[:, 0:2]
# graph_label = dfLigRec_sel
# graph_label.head()
# # ligRecLabel
# nPairs = ligRecInteraction.shape[0]
# ligRecLabel = []
# for i in range(0, nPairs):
# y = '-'.join(graph_label.iloc[i, :].tolist())
# ligRecLabel.append(y)
# colors = ['#d38d8d', '#d3ae8d', '#d3ae8d', '#cbd38d', '#b2d38d', '#8dd3c0', '#8db1d3', '#908dd3', '#af8dd3', '#d38dc4', '#d38dc4', '#ab1616', '#ab9c16', '#52ab16', '#16ab90', '#1643ab', '#ab1697', '#ab5e16']
# # plot the interaction figure
# G = draw_cellNet_Label(celltype_names, graph_celltype, fig_name, ligRecLabel, colors)
# G.render(fig_name, view=True)
# Code chunk 4
def readSCData(inputOCFile, ligRecDBFile):
""" Read single cell data and ligand-receptor DB, filtering tanscriptome to lig/rec only"""
## read single-cell Ovary data
OCMix = sc.read_h5ad(inputOCFile)
### Bob edit 4/1/20 at 12:27 PM. RIKEN or cellphonedb are not in my ligRecDBFile name
# Read ligand-Receptors
#if 'RIKEN' in ligRecDBFile:
# genePairIn = pd.read_csv(ligRecDBFile, header=None)
# genePairIn = genePairIn.drop([0], axis=1)
# genePairIn.columns = ["ligand", "receptor"]
# Read cellphonedb from cellphonedb.org
#if 'cellphonedb' in ligRecDBFile:
# genePairIn = parseCellPhoneDB(ligRecDBFile)
genePairIn = pd.read_csv(ligRecDBFile, header=None)
# genePairIn = genePairIn.drop([2,3,4,5,6], axis=1) # get rid of extra columns if they are present
genePairIn.columns = ["ligand", "receptor"]
# provide a list of unique pairs of ligand and receptor
ligandList = genePairIn["ligand"].unique().tolist()
receptorList = genePairIn["receptor"].unique().tolist()
ligRecGenes = ligandList + receptorList
# Check ligand-receptor pairs are in scRNASeq data
geneInRNASeq = pgt.unique(OCMix.var.index.values)
gene_exist = pgt.intersect(ligRecGenes, geneInRNASeq)
selM = genePairIn["ligand"].isin(gene_exist) & genePairIn["receptor"].isin(gene_exist)
genePairIn_exist = genePairIn.loc[selM, :]
# Filter the gene expression matrix of Ligand /Receptors
geneInLigRec = OCMix.var.index.isin(ligRecGenes)
ocInteractome = OCMix[:, geneInLigRec]
ocInteractome.var_names = ocInteractome.var.index.values
ocInteractome.obs_names = ocInteractome.obs.index.values
OCMix = None
gc.collect()
return (ocInteractome, genePairIn_exist, ligandList)
# Code chunk 5
def parseCellPhoneDB(ligRecDBFile):
''' Parse cellphonedb data to ligand-receptor dataFrame'''
# dbpath = '/local2/singleCellProjects/Projects/HLP-003-OC_scCellNet/Data/refDB/'
# ligRecDBFile = dbpath + 'interactions_cellphonedb.csv'
# Read ligand-Receptors
#pd.set_option('display.max_columns', 30)
genePairIn = pd.read_csv(ligRecDBFile, header=None)
genePairIn.head()
genePairIn.shape
genePairIn.columns = genePairIn.iloc[0,:]
genePairIn = genePairIn.iloc[1:]
genePairIn.head()
sel = genePairIn.partner_a.str.contains('simple') & genePairIn.partner_b.str.contains('simple')
ligRecDF = genePairIn.loc[sel,:]
ligRecDF = ligRecDF.iloc[:,[4,5]]
ligRecDF.columns = ['ligand','receptor']
ligRecDF['ligand'] = ligRecDF['ligand'].map(lambda x: x.partition('_')[0])
ligRecDF['receptor'] = ligRecDF['receptor'].map(lambda x: x.partition('_')[0])
print(ligRecDF.head())
ligRecDF.shape
return ligRecDF
# Code chunk 6
def subCompLigRecInfo(cellInteractome, ligandList, iCellType, popMean, popSD, outPath, outLRFile,fielApp=0):
"""subfunction for parallel computing ligand / receptor statistical info in each cell type """
# Debug
# cellInteractome = ocInteractome
# iCellType = cellInteractome.obs.cluster[0]
# extract gene and cell info from annData: cellInteractome
# popLigRecGenes = cellInteractome.var_names
# len(popLigRecGenes)
# Data structure for this cell type
oneCellTypeIDs = cellInteractome.obs.cluster == iCellType
selDfLigRec = cellInteractome[oneCellTypeIDs, :]
# selDfLigRec.shape
cellRatio = selDfLigRec.shape[0] / cellInteractome.shape[0]
nCell = selDfLigRec.shape[0]
# define ligRecInfo data frame
ligRecGenes = selDfLigRec.var_names
nLigRec = len(ligRecGenes)
ligRecInfo = pd.DataFrame({"LigRecGeneID": '',
"geneType": '',
"cellType": '',
"geneExpMean": 0.0,
"geneExpSD": 0.0,
"cellNum": 0,
"cellRatio": 0.0,
"nonzeroRatio": 0.0,
"ZScore": np.repeat(0.0, nLigRec)})
# Loop over all ligRec genes
for i in range(0, nLigRec):
iLigRec = ligRecGenes[i]
ligRecInfo.loc[i, "LigRecGeneID"] = iLigRec
if (np.count_nonzero(selDfLigRec[:, iLigRec].X) > 0):
ligRecInfo.loc[i, "cellType"] = iCellType
if iLigRec in ligandList:
ligRecInfo.loc[i, "geneType"] = "Ligand"
else:
ligRecInfo.loc[i, "geneType"] = "Receptor"
try:
xMean = selDfLigRec[:, iLigRec].X.mean() # annData[cellIDs, geneIDs] no .loc
xSD = selDfLigRec[:, iLigRec].X.std()
nonZero = np.count_nonzero(selDfLigRec[:, iLigRec].X) / selDfLigRec.shape[0] # non-zero cell#
ligRecInfo.loc[i, "geneExpMean"] = xMean
assert isinstance(xSD, object)
ligRecInfo.loc[i, "geneExpSD"] = xSD
ligRecInfo.loc[i, "cellNum"] = nCell
ligRecInfo.loc[i, "cellRatio"] = cellRatio
ligRecInfo.loc[i, "nonzeroRatio"] = nonZero
# Calculate ZScore
if np.isnan(popSD[i]) | np.isnan(popMean[i]) | (popSD[i] < 1e-6):
ZScore = 0.0
else:
ZScore = (xMean - popMean[i]) / popSD[i]
if np.isnan(ZScore):
ligRecInfo.loc[i, "ZScore"] = 0.0
else:
ligRecInfo.loc[i, "ZScore"] = ZScore
except ValueError as e:
print(e)
print(selDfLigRec[:, iLigRec].X)
ligRecInfo = ligRecInfo.loc[ligRecInfo.nonzeroRatio > 0, :] # remove genes are zero in all cells
print(ligRecInfo.head())
# write to a file
outLRFile = outPath + outLRFile
if fielApp == 0:
print('initial file writing')
ligRecInfo.to_csv(outLRFile, index=None, header=True)
if fielApp ==1:
with open(outLRFile, 'a') as f:
ligRecInfo.to_csv(f, index=None, header=False)
gc.collect()
return None
# Code chunk 7
def compligRecInfo(cellInteractome, ligandList, nCPU, outPath, outLRFile):
"""Main function to caculate ligand / receptor info in each cell type"""
# extract cell type data
cellTypes = cellInteractome.obs.cluster.unique()
##############################################
# serial computing version
popMean = cellInteractome.X.T.mean(axis=1)
popSD = cellInteractome.X.T.std(axis=1)
print('cell type: ' + cellTypes[0])
subCompLigRecInfo(cellInteractome, ligandList, cellTypes[0], popMean, popSD,
outPath, outLRFile, 0)
for iCellType in tqdm(cellTypes[1:]):
print('cell type: ' + iCellType)
subCompLigRecInfo(cellInteractome, ligandList, iCellType, popMean, popSD,
outPath, outLRFile, 1)
print('LigRecInfo computation finished. File is stored at: \n')
print(outPath+outLRFile)
# dfLigRecInfo = pd.DataFrame()
#
# for iCellType in tqdm(cellTypes):
# pLigRecInfo = subCompLigRecInfo(cellInteractome, ligandList, iCellType)
# dfLigRecInfo = dfLigRecInfo.append(pLigRecInfo)
# pLigRecInfo = None
# gc.collect()
# Sort by the product of liang and receptor
# ligRecInteraction = ligRecInteraction.sort_values(by='LigXRec', ascending=False)
##############################################
# # Parallel version
# # set function's locked parameters
# # dfLigRecInfo = subCompLigRecInfo(cellInteractome, ligandList, cellTypes[0])
# CompCellLigRec = partial(subCompLigRecInfo, cellInteractome, ligandList)
# # Parallel computing using Pool Map
# try:
# with mp.Pool(processes=nCPU) as pool:
# pLigRecInfo = pool.map(CompCellLigRec, cellTypes)
#
# # concat the list of pd dataframe
# dfLigRecInfo = pd.concat(pLigRecInfo)
# except ValueError as e:
# print(e)
return None
# Code chunk 8
def compIntegralLigRec(cellInteractome):
""" Compute integral of ligand and receptor distribution with Ligand saturature"""
# Code chunk 9
def subCompCellLigRecPairs(LigRecPair, LigRecInfo, nCellType, xCellType):
""" SubFunction to parallelize computing Ligand/Receptor product """
# Define dataFrame
ligRecInteraction = pd.DataFrame({"LigGeneID": '',
"RecGeneID": '',
"cellTypeLig": '',
"cellTypeRec": '',
"cellExpRatioLig": 0.0,
"cellExpRatioRec": 0.0,
"IntLigRec": 0.0,
"LigXRec": 0.0,
"IntegralLigRec": 0.0,
"LigRecZScore": np.repeat(0.0, 2 * nCellType)})
# Loop over ligand /receptor pairs
# print(LigRecPair.shape)
### run every cell type vs every cell type
### choose certain cell type interested in to save time
for i, xLigRecPair in LigRecPair.iterrows(): # iterate through each row
# cancer cell with Ligand
# print('pair No.'+str(i))
# print(xLigRecPair)
iCellType = cluster ### INSERT LIGAND CLUSTER HERE ###
jCellType = xCellType
# select Ligand
selLig = (LigRecInfo.LigRecGeneID == xLigRecPair[0]) & (LigRecInfo.cellType == iCellType)
# select Receptor
selRec = (LigRecInfo.LigRecGeneID == xLigRecPair[1]) & (LigRecInfo.cellType == jCellType)
if (np.count_nonzero(selLig) * np.count_nonzero(selRec) ):
ligRecInteraction.loc[2*i, "LigGeneID"] = xLigRecPair[0]
ligRecInteraction.loc[2*i, "RecGeneID"] = xLigRecPair[1]
ligRecInteraction.loc[2*i, "cellTypeLig"] = iCellType
ligRecInteraction.loc[2*i, "cellTypeRec"] = jCellType
ligRecInteraction.loc[2*i, "cellExpRatioLig"] = LigRecInfo.loc[selLig, "cellRatio"].values
ligRecInteraction.loc[2*i, "cellExpRatioRec"] = LigRecInfo.loc[selRec, "cellRatio"].values
ligRecInteraction.loc[2*i, "LigXRec"] = float(LigRecInfo.loc[selLig, "geneExpMean"]) * \
float(LigRecInfo.loc[selRec, "geneExpMean"])
ligRecInteraction.loc[2*i, "LigRecZScore"] = float(LigRecInfo.loc[selLig, "ZScore"]) * \
float(LigRecInfo.loc[selRec, "ZScore"])
# tmp = float(LigRecInfo.loc[selLig, "geneExpMean"]) * float(LigRecInfo.loc[selRec, "geneExpMean"])
# print(tmp)
# cancer cell with receptor
iCellType = xCellType
jCellType = cluster ### INSERT RECEPTOR CLUSTER HERE ###
# select Ligand
selLig = (LigRecInfo.LigRecGeneID == xLigRecPair[0]) & (LigRecInfo.cellType == iCellType)
# select Receptor
selRec = (LigRecInfo.LigRecGeneID == xLigRecPair[1]) & (LigRecInfo.cellType == jCellType)
# print(sum(selLig))
# print(sum(selRec))
if (np.count_nonzero(selLig) * np.count_nonzero(selRec) ):
ligRecInteraction.loc[2*i+1, "LigGeneID"] = xLigRecPair[0]
ligRecInteraction.loc[2*i+1, "RecGeneID"] = xLigRecPair[1]
ligRecInteraction.loc[2*i+1, "cellTypeLig"] = iCellType
ligRecInteraction.loc[2*i+1, "cellTypeRec"] = jCellType
ligRecInteraction.loc[2*i+1, "cellExpRatioLig"] = LigRecInfo.loc[selLig, "cellRatio"].values
ligRecInteraction.loc[2*i+1, "cellExpRatioRec"] = LigRecInfo.loc[selRec, "cellRatio"].values
ligRecInteraction.loc[2*i+1, "LigXRec"] = float(LigRecInfo.loc[selLig, "geneExpMean"]) * \
float(LigRecInfo.loc[selRec, "geneExpMean"])
ligRecInteraction.loc[2*i+1, "LigRecZScore"] = float(LigRecInfo.loc[selLig, "ZScore"]) * \
float(LigRecInfo.loc[selRec, "ZScore"])
# print(ligRecInteraction.loc[2*i+1, "LigXRec"])
# filter zero
ligRecInteraction = ligRecInteraction.loc[ligRecInteraction.LigXRec > 0, :]
print(ligRecInteraction.head())
return ligRecInteraction
# Code chunk 10
def compCellCellInteraction(cellTypes, genePairIn_exist, topPairs, nCPU, outPath, outLRFile):
'''Calculate Cell interaction network by computing Ligand-Receptor cell-cell gene expression Product'''
## Choose each cell type topPairs lig-rec, decrease total computing time
# read ligand Receptor pairs
dfligRecInfo = pd.read_csv(outPath + outLRFile, index_col=None)
print('Check reading LigRec info OK?')
print(dfligRecInfo.head())
if topPairs > 101: # if more than 100, just return everything
genePairIn_top = genePairIn_exist
else:
topGenelist = set()
for iCell in cellTypes:
x = dfligRecInfo.loc[dfligRecInfo.cellType == iCell, ]
x = x.sort_values(by='geneExpMean', ascending=False).head(topPairs)
x = x.LigRecGeneID
topGenelist = topGenelist.union(set(x))
sel = genePairIn_exist.ligand.isin(topGenelist) | genePairIn_exist.receptor.isin(topGenelist)
genePairIn_top = genePairIn_exist.loc[sel, :]
print(genePairIn_top.head)
print(genePairIn_top.shape)
##############################################
# serial computing version
ligRecInteraction = pd.DataFrame()
nCellType = len(cellTypes)
for iCellType in tqdm(cellTypes):
pligRecInteraction = subCompCellLigRecPairs(genePairIn_top, dfligRecInfo, nCellType, iCellType)
ligRecInteraction = ligRecInteraction.append(pligRecInteraction)
pligRecInteraction = None
gc.collect()
# Sort by the product of ligand and receptor
ligRecInteraction = ligRecInteraction.sort_values(by='LigXRec', ascending=False)
##############################################
# Parallel computing version
# Time cosuming option: choose all pairs
# genePairIn_top = genePairIn_exist
# set function's locked parameters
# subCompCellLigRecPairs(cellTypes, LigRecInfo, xLigRecPair)
# nCellType = len(cellTypes)
# CompLigRecPairs = partial(subCompCellLigRecPairs, genePairIn_top, dfligRecInfo, nCellType)
#
# # test = subCompCellLigRecPairs(genePairIn_top, dfligRecInfo, nCellType, cellTypes[0])
# # test.head()
# # ligRecInteraction = test
#
# # Parallel computing using pool
# try:
# with mp.Pool(processes=nCPU) as pool:
# pligRecInteraction = pool.map(CompLigRecPairs, cellTypes)
#
# # concat the list of pd dataframe
# ligRecInteraction = pd.concat(pligRecInteraction)
# ligRecInteraction = ligRecInteraction.sort_values(by='LigXRec', ascending=False)
#
# except ValueError as e:
# print(e)
# print(ligRecInteraction.head())
outPath = results_path # os.path.join('ZLP020scCellNet', 'Results')
outputFile = os.path.join(outPath, 'testCellNet_any2Cancer.csv')
ligRecInteraction.to_csv(outputFile)
return ligRecInteraction
# Code chunk 11
def compOnePatientCellNet(adata_interactome, topPairs, nCPU,
genePairIn_exist, ligandList,outPath,outLRFile):
"""Compute and plot cell interaction network for one or one group of patients"""
# get cell types
cellTypes = adata_interactome.obs.cluster.unique()
# compute LigRecInfo.locr for each ligand /receptor in each cell group:
# each gene, each cell type, gene expression level
print('Computing ligand-Receptor Info ...')
#dfligRecInfo = compligRecInfo(adata_interactome, ligandList, nCPU, outPath, outLRFile)
compligRecInfo(adata_interactome, ligandList, nCPU, outPath, outLRFile)
# compute ligXRec for each pair of ligand/receptor between each pair of cell types
print('Computing Cell ligand-Receptor interactions...')
# ligRecInteraction = compCellCellInteraction(cellTypes, genePairIn_exist,
# dfligRecInfo, topPairs, nCPU)
# read file of ligand and receptor
ligRecInteraction = compCellCellInteraction(cellTypes, genePairIn_exist,
topPairs, nCPU, outPath, outLRFile)
# Sort by ligand / Receptor product
ligRecInteractionTop = ligRecInteraction.sort_values(by='LigXRec', ascending=False)
# filter out cancer to itself
ligRecInteractionTop = ligRecInteractionTop.loc[ligRecInteractionTop.cellTypeLig != ligRecInteractionTop.cellTypeRec, ]
outPath = results_path # os.path.join('ZLP020_scCellNet', 'Results')
outputFile = os.path.join(results_path, 'testCellNet_Top.csv')
ligRecInteractionTop.to_csv(outputFile)
return ligRecInteractionTop
# Code chunk 12
def compCellNet(pShort, topPairs, nCPU, cellRatioSig, geneExpSig,
ligRecDBFile, inputOCFile, outPath, outLRFile):
'''Compute and plot cell interaction network for one group of patients'''
# read single-cell data
[adata_interactome, genePairIn_exist, ligandList] = readSCData(inputOCFile, ligRecDBFile)
print('Reading data...')
print(adata_interactome)
print(adata_interactome.obs.head())
print(adata_interactome.var_names.shape)
gc.collect()
# adata_interactome.obs = adata_interactome.obs.rename(columns = {'cell_type_per_leiden': 'cluster'})
adata_interactome.obs = adata_interactome.obs.rename(columns = {'cluster_name': 'cluster'}) ## CHANGED 'cell_type' TO 'cluster_name'
# Compute cancer cells treated vs. untreated
pTreatment = TREATMENT # constant defined at top ('Iso' or 'PD1' treated groups)
adata_untreated = adata_interactome[adata_interactome.obs['Treatment'] == pTreatment,:]
ligRecInteractionTop = compOnePatientCellNet(adata_untreated, topPairs, nCPU,
genePairIn_exist, ligandList, outPath, outLRFile)
### run another round with aPD1 too
adata_untreated = None
## visualize the cell-cell interaction network
ligRecInteractionTop.shape
print(ligRecInteractionTop.head())
# print('Ploting cell interaction net...')
# fig_name = outPath + pShort + 'cellNet_' + pTreatment
# plot_cell_interaction_net(ligRecInteractionTop, fig_name)
outputFile = os.path.join(results_path, 'LigXRec_interactions_' + ADATA_TYPE, cluster, cluster + '_' + pTreatment + '.csv')
ligRecInteractionTop.to_csv(outputFile)
# List cell types in top ligand/receptor pairs
# plot_scatter(ligRecInteraction, cellLigRecMat_filtered, topPairs, fig_name, path)
# Code chunk 13
def main():
# Define data path and data filenames
# Define file pathes and dbfiles
# data_path = os.path.join('ZLP020_scCellNet', 'Data')
# results_path = os.path.join('ZLP020_scCellNet', 'Results')
path = 'adata_steps'
dbpath = data_path
inputDataFile = os.path.join(path, 'expB_v1_step7_'+ ADATA_TYPE +'.h5ad') # input data of saved adata subset
outPath = os.path.join(results_path, 'LigXRec_interactions_' + ADATA_TYPE, cluster, '') # outputs for each cluster
outLRFile = 'LigRecCellInfo.csv'
# Pandas display columns
pd.set_option('display.max_columns', 30)
########################################################################################
# Parameters to change and debug
# ligRecDBFile = dbpath + 'Ligand_Receptor_RIKEN_UCLA_bigDB.csv' # lig-rec DB from FANTOM 5
# pDB = 'FANTOM5'
ligRecDBFile = os.path.join(dbpath, 'ligand_receptor_pairs_all_vNATMI.csv') # list of ligand-receptor gene pairs
pDB = cluster + '_'
# Cut-off to choose significantly highly expressed ligand-receptor pairs
cellRatioSig = 0.05
geneExpSig = 0.01
topPairs = 100 # 100 returns entire table
# Number of CPUs for parallel computing
nCPU = 4
## timing
start_time = timeit.default_timer()
print('Computing......')
########################################################################################
# Compute cell interaction network
compCellNet(pDB, topPairs, nCPU, cellRatioSig, geneExpSig,
ligRecDBFile, inputDataFile, outPath, outLRFile)
# for i in tqdm(range(10000)):
# pass
elapsed = timeit.default_timer() - start_time
print('Total running time:')
print(elapsed)
# END OF PROGRAM
# run program
if __name__ == "__main__":
main()
The LigXRec interaction results from every cluster for both treatments are separated into dataframes in which they act as the ligand or receptor in an interaction in the specified treatment. These dataframes are combined to make one table containing every interaction a cluster shows in each treatment. These tables are then merged based on like interactions to give differences in LigXRec values (aPD1 - Iso) between clusters between each ligand-receptor gene pair. All clusters' LigXRec information dataframes are then merged to give an overview of each tissue subtype's LigXRec interactions.
# Function to create LigXRec difference dataframes (DFs) for individual clusters
result_path = os.path.join('ZLP020_scCellNet', 'Results', 'LigXRec_interactions_')
def cluster_ligxrec_dfs(adata_type):
for cluster in cluster_names:
# Ligand Isotype edited DF
iso_ligand_df = pd.read_csv(os.path.join(result_path + adata_type, cluster, cluster + '_Iso.csv'), index_col=[0])
iso_ligand_df = iso_ligand_df[iso_ligand_df['cellTypeLig'] == cluster]
iso_ligand_df = iso_ligand_df.drop(columns=['IntLigRec', 'cellExpRatioLig', 'cellExpRatioRec', 'IntegralLigRec', 'LigRecZScore'])
# Receptor Isotype edited DF
iso_receptor_df = pd.read_csv(os.path.join(result_path + adata_type, cluster, cluster + '_Iso.csv'), index_col=[0])
iso_receptor_df = iso_receptor_df[iso_receptor_df['cellTypeRec'] == cluster]
iso_receptor_df = iso_receptor_df.drop(columns=['IntLigRec', 'cellExpRatioLig', 'cellExpRatioRec', 'IntegralLigRec', 'LigRecZScore'])
# Concatenate edited isotype ligand and receptor DFs, add columns, and save to csv
full_iso_df = pd.concat([iso_ligand_df, iso_receptor_df], ignore_index=True)
full_iso_df['Treatment'] = 'Iso'
full_iso_df['adata_type'] = adata_type
full_iso_df['Lig-Rec Gene Pairs'] = np.nan
full_iso_df['Lig-Rec Cell Pairs'] = np.nan
try:
full_iso_df['Lig-Rec Gene Pairs'] = full_iso_df.apply(lambda row: row.LigGeneID + '-' + row.RecGeneID, axis=1)
full_iso_df['Lig-Rec Cell Pairs'] = full_iso_df.apply(lambda row: row.cellTypeLig + '-' + row.cellTypeRec, axis=1)
except ValueError:
pass
full_iso_df.to_csv(os.path.join(result_path + adata_type, cluster, cluster+'_iso_LigXRec_DF.csv'), index=False)
# Ligand PD1 edited DF
PD1_ligand_df = pd.read_csv(os.path.join(result_path + adata_type, cluster, cluster + '_PD1.csv'), index_col=[0])
PD1_ligand_df = PD1_ligand_df[PD1_ligand_df['cellTypeLig'] == cluster]
PD1_ligand_df = PD1_ligand_df.drop(columns=['IntLigRec', 'cellExpRatioLig', 'cellExpRatioRec', 'IntegralLigRec', 'LigRecZScore'])
# Receptor PD1 edited DF
PD1_receptor_df = pd.read_csv(os.path.join(result_path + adata_type, cluster, cluster + '_PD1.csv'), index_col=[0])
PD1_receptor_df = PD1_receptor_df[PD1_receptor_df['cellTypeRec'] == cluster]
PD1_receptor_df = PD1_receptor_df.drop(columns=['IntLigRec', 'cellExpRatioLig', 'cellExpRatioRec', 'IntegralLigRec', 'LigRecZScore'])
# Concatenate edited PD1 ligand and receptor DFs, add columns, and save to csv
full_PD1_df = pd.concat([PD1_ligand_df, PD1_receptor_df], ignore_index=True)
full_PD1_df['Treatment'] = 'PD1'
full_PD1_df['adata_type'] = adata_type
full_PD1_df['Lig-Rec Gene Pairs'] = np.nan
full_PD1_df['Lig-Rec Cell Pairs'] = np.nan
try:
full_PD1_df['Lig-Rec Gene Pairs'] = full_PD1_df.apply(lambda row: row.LigGeneID + '-' + row.RecGeneID, axis=1)
full_PD1_df['Lig-Rec Cell Pairs'] = full_PD1_df.apply(lambda row: row.cellTypeLig + '-' + row.cellTypeRec, axis=1)
except ValueError:
pass
full_PD1_df.to_csv(os.path.join(result_path + adata_type, cluster, cluster + '_PD1_LigXRec_DF.csv'), index=False)
# Merge edited isotype and PD1 DFs
full_df = full_iso_df.merge(full_PD1_df,
left_on=['LigGeneID', 'RecGeneID', 'cellTypeLig', 'cellTypeRec',
'adata_type', 'Lig-Rec Gene Pairs', 'Lig-Rec Cell Pairs'],
right_on=['LigGeneID', 'RecGeneID', 'cellTypeLig', 'cellTypeRec',
'adata_type','Lig-Rec Gene Pairs', 'Lig-Rec Cell Pairs'],
suffixes=('_Iso', '_PD1'))
# Calculate difference, fold change (fc), and Log2FC of LigXRec between aPD-1 and Isotype treatments
full_df['LigXRec Diff (PD1-Iso)'] = full_df['LigXRec_PD1'] - full_df['LigXRec_Iso']
full_df = full_df.sort_values(by=['LigXRec Diff (PD1-Iso)'], ascending=False)
fc = [(full_df['LigXRec_PD1'].iloc[i] / full_df['LigXRec_Iso'].iloc[i]) for i in range(len(full_df))]
full_df['Fold Change (PD1/Iso)'] = fc
log2fc = [math.log2(fc[i]) for i in range(len(fc))]
full_df['Log2(FC)'] = log2fc
# Save to .csv
full_df.to_csv(os.path.join(result_path + adata_type, cluster, cluster + '_diff_LigXRec_DF.csv'), index=False)
# Combine all individual cluster DFs into DFs encompassing entire adata_type for each treatment
def combined_adata_type_dirs(adata_type):
# Create lists of every cluster DF dir. for each treatment
all_iso_clusters_dirs = [os.path.join(result_path + adata_type, cluster, cluster + '_iso_LigXRec_DF.csv') for cluster in cluster_names]
all_PD1_clusters_dirs = [os.path.join(result_path + adata_type, cluster, cluster + '_PD1_LigXRec_DF.csv') for cluster in cluster_names]
combined_dirs = [os.path.join(result_path + adata_type, cluster, cluster + '_diff_LigXRec_DF.csv') for cluster in cluster_names]
# Add each to make list containing each dir. list
list_of_dirs = []
list_of_dirs.append(all_iso_clusters_dirs)
list_of_dirs.append(all_PD1_clusters_dirs)
list_of_dirs.append(combined_dirs)
# Concatenate each treatment list into one DF for each adata_type treatment
for dir_list in list_of_dirs:
treatment_df = pd.DataFrame()
for filename in dir_list:
file_df = pd.read_csv(filename, index_col=None, header=0)
treatment_df = treatment_df.append(file_df, ignore_index=True)
treatment_df = treatment_df.drop_duplicates()
# Sort LigXRec values and save .csv tables based on treatment
if 'iso' in dir_list[0]:
treatment_df = treatment_df.sort_values(by=['LigXRec'], ascending=False)
treatment_df.to_csv(os.path.join(result_path + adata_type, 'all_clusters_iso_LigXRec.csv'), index=False)
elif 'PD1' in dir_list[0]:
treatment_df = treatment_df.sort_values(by=['LigXRec'], ascending=False)
treatment_df.to_csv(os.path.join(result_path + adata_type, 'all_clusters_PD1_LigXRec.csv'), index=False)
elif 'diff' in dir_list[0]:
treatment_df = treatment_df.sort_values(by=['LigXRec Diff (PD1-Iso)'], ascending=False)
treatment_df.to_csv(os.path.join(result_path + adata_type, 'all_clusters_combined_LigXRec.csv'), index=False)
# Create DFs and .csv files of individual cluster and combined LigXRec interactions data
def LigXRec_files_main(adata_type):
cluster_ligxrec_dfs(adata_type)
combined_adata_type_dirs(adata_type)
LigXRec_files_main('fatT')
LigXRec_files_main('fatC')
LigXRec_files_main('onlyT')
# LigXRec_files_main('allT')
# Save progress
adataFatT.write(os.path.join('adata_steps', 'expB_step8_fatT_LigXRec_tables.h5ad'))
adataFatC.write(os.path.join('adata_steps', 'expB_step8_fatC_LigXRec_tables.h5ad'))
adataOnlyT.write(os.path.join('adata_steps', 'expB_step8_onlyT_LigXRec_tables.h5ad'))
# adataAllT.write(os.path.join('adata_steps', 'expB_step8_allT_LigXRec_tables.h5ad'))
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
In order to compare the levels of ligand and receptor gene expression levels based on iso/aPD-1 treatment while accounting for different numbers of cells in each cluster, we created DFs for each cluster which contained cells unique to each cluster along with the raw expression levels of every gene in the adata set. The cluster expression levels of each gene are then averaged to show the overall relative magnitude of ligand and receptor genetic expression in each cluster depending on treatment. This information can be used when examining the importance of LigXRec interaction results as users should be aware that high LigXRec values may not necessarily indicate large numbers of interactions between clusters. For example, relatively low ligand or receptor score values are indicators of low genetic expression, but a high corresponding LigXRec value can indicate notable levels of ligand-receptor bonding when interactions do occur.
# Create folder for raw gene scores (only need to do once)
os.mkdir('Iso_PD1_raw_gene_scores')
# Create dictionary with raw gene expression DFs and adata of each cluster
def cluster_genes_dict(adata_type):
clusters_dict = {}
for cluster in cluster_names:
cluster_adata = adata_type[adata_type.obs['cluster_name'].isin([cluster])]
cluster_df = cluster_adata.to_df()
clusters_dict.update({cluster: [cluster_adata, cluster_df]})
return(clusters_dict)
# Create adata_type_dict with raw gene expression DFs
fatT_dict = cluster_genes_dict(adataFatT)
fatC_dict = cluster_genes_dict(adataFatC)
onlyT_dict = cluster_genes_dict(adataOnlyT)
# allT_dict = cluster_genes_dict(adataAllT)
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if not is_categorical(df_full[k]): /opt/anaconda3/envs/Python38/lib/python3.8/site-packages/pandas/core/arrays/categorical.py:2487: FutureWarning: The `inplace` parameter in pandas.Categorical.remove_unused_categories is deprecated and will be removed in a future version. res = method(*args, **kwargs)
onlyT_dict['tumor0'][0]
View of AnnData object with n_obs × n_vars = 7815 × 20737
obs: 'Barcode', 'library_id', 'molecule_h5', 'lab_sample_id', 'sample_name', 'Model', 'Type', 'Location', 'Treatment', 'Replicate', 'cell_fraction', 'Timepoint', 'n_genes', 'cell_type', 'percent_mito', 'percent_ribo', 'n_counts', 'leiden', 'Epcam', 'Pax7', 'lin_mark_present', 'doublet_scores', 'predicted_doublets', 'batch', 'cluster_name'
var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'Treatment_colors', 'cell_type_colors', 'cluster_name_colors', "dendrogram_['leiden']", 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap'
obsm: 'X_pca', 'X_umap'
# Define variables for DFs of each combined_adata_type treatment dir. location
result_path = os.path.join('ZLP020_scCellNet', 'Results', 'LigXRec_interactions_')
# FatT
fatT_iso_table = pd.read_csv(os.path.join(result_path + 'fatT', 'all_clusters_iso_LigXRec.csv'))
fatT_PD1_table = pd.read_csv(os.path.join(result_path + 'fatT', 'all_clusters_PD1_LigXRec.csv'))
# FatC
fatC_iso_table = pd.read_csv(os.path.join(result_path + 'fatC', 'all_clusters_iso_LigXRec.csv'))
fatC_PD1_table = pd.read_csv(os.path.join(result_path + 'fatC', 'all_clusters_PD1_LigXRec.csv'))
# OnlyT
onlyT_iso_table = pd.read_csv(os.path.join(result_path + 'onlyT', 'all_clusters_iso_LigXRec.csv'))
onlyT_PD1_table = pd.read_csv(os.path.join(result_path + 'onlyT', 'all_clusters_PD1_LigXRec.csv'))
# AllT
# allT_iso_table = pd.read_csv(os.path.join(result_path + 'allT', 'all_clusters_iso_LigXRec.csv'))
# allT_PD1_table = pd.read_csv(os.path.join(result_path + 'allT', 'all_clusters_PD1_LigXRec.csv'))
# Calculate ligand/receptor score totals based on expression levels of genes in each cluster
def cluster_adata_gene_exp(adata_type_dict, treatment_table, adata_type_treatment):
#treatment_table.reindex(index=range(len(treatment_table)))
treatment_table = treatment_table.reset_index(drop=True)
#treatment_table.index.astype(dtype='object')
treatment_table['Lig Score'] = np.nan
treatment_table['Rec Score'] = np.nan
starttime = time.time()
for cluster in tqdm(cluster_names):
print('Calculating Ligand/Receptor Scores for ' + cluster + '...')
# calculate average ligand/receptor scores
for row in range(len(treatment_table)):
# ligand cells/genes
ligand_cluster = treatment_table['cellTypeLig'][row]
ligand_gene = treatment_table['LigGeneID'][row]
ligand_score_total = 0
if ligand_cluster == cluster:
ligand_adata = adata_type_dict[cluster][0]
ligand_raw_gene_exp = adata_type_dict[cluster][1]
for cell_num in range(len(ligand_adata)):
ligand_score_total += float(ligand_raw_gene_exp[ligand_gene][cell_num])
try:
ligand_score_final = ligand_score_total / len(ligand_adata)
treatment_table['Lig Score'][row] = ligand_score_final
except ZeroDivisionError:
treatment_table['Lig Score'][row] = np.nan
else:
pass
# receptor cells/genes
receptor_cluster = treatment_table['cellTypeRec'][row]
receptor_gene = treatment_table['RecGeneID'][row]
receptor_score_total = 0
if receptor_cluster == cluster:
receptor_adata = adata_type_dict[cluster][0]
receptor_raw_gene_exp = adata_type_dict[cluster][1]
for cell_num in range(len(receptor_adata)):
receptor_score_total += float(receptor_raw_gene_exp[receptor_gene][cell_num])
try:
receptor_score_final = receptor_score_total / len(receptor_adata)
treatment_table['Rec Score'][row] = receptor_score_final
except ZeroDivisionError:
treatment_table['Rec Score'][row] = np.nan
else:
pass
treatment_table.to_csv(os.path.join('Iso_PD1_raw_gene_scores', adata_type_treatment + '_ligrec_scores.csv'), index=False)
print('FINISHED')
print(f'Total time elapsed: {time.time() - starttime} seconds')
return(treatment_table)
# Calculate lig/rec scores for each adata_type
fatT_iso_scores = cluster_adata_gene_exp(fatT_dict, fatT_iso_table, 'fatT_iso')
fatT_PD1_scores = cluster_adata_gene_exp(fatT_dict, fatT_PD1_table, 'fatT_PD1')
fatC_iso_scores = cluster_adata_gene_exp(fatC_dict, fatC_iso_table, 'fatC_iso')
fatC_PD1_scores = cluster_adata_gene_exp(fatC_dict, fatC_PD1_table, 'fatC_PD1')
onlyT_iso_scores = cluster_adata_gene_exp(onlyT_dict, onlyT_iso_table, 'onlyT_iso')
onlyT_PD1_scores = cluster_adata_gene_exp(onlyT_dict, onlyT_PD1_table, 'onlyT_PD1')
# allT_iso_scores = cluster_adata_gene_exp(allT_dict, allT_iso_table, 'allT_iso')
# allT_PD1_scores = cluster_adata_gene_exp(allT_dict, allT_PD1_table, 'allT_PD1')
# Save adata sets of ligand and receptor gene scores
adataFatT.write(os.path.join('adata_steps', 'expB_step9_fatT_ligrec_scores.h5ad'))
adataFatC.write(os.path.join('adata_steps', 'expB_step9_fatC_ligrec_scores.h5ad'))
adataOnlyT.write(os.path.join('adata_steps', 'expB_step9_onlyT_ligrec_scores.h5ad'))
# adataAllT.write(os.path.join('adata_steps', 'expB_step9_allT_ligrec_scores.h5ad'))
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
When examining the resulting ligand and receptor gene scores for each treatment, we then looked to construct DFs combining every cell-cell and gene-gene pair along with their corresponding isotype and anti-PD-1 LigXRec values in order to evaluate the difference that immunotherapy could have on these interactions in each tissue subset. To try and cut down on the number of interactions, we made a filter which only allowed for tables of ligand and receptor gene scores that are greater than 1 as we felt that these are genes which indicate expression levels notable enough to be seriously considered for the purposes of this analysis. However, we also performed the same analysis on the full, unfiltered dataset for comparison purposes. The greatest differences in LigXRec values between anti-PD-1 and isotype gene and cell pairs were then organized into tables and plotted on heatmaps for visual analysis.
# Read in generated lig-rec scores .csv files as DF variables if running again to save on time
fatT_iso_scores = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', 'fatT_iso_ligrec_scores.csv'))
fatT_PD1_scores = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', 'fatT_PD1_ligrec_scores.csv'))
fatC_iso_scores = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', 'fatC_iso_ligrec_scores.csv'))
fatC_PD1_scores = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', 'fatC_PD1_ligrec_scores.csv'))
onlyT_iso_scores = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', 'onlyT_iso_ligrec_scores.csv'))
onlyT_PD1_scores = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', 'onlyT_PD1_ligrec_scores.csv'))
# allT_iso_scores = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', 'allT_iso_ligrec_scores.csv'))
# allT_PD1_scores = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', 'allT_PD1_ligrec_scores.csv'))
# Filter ligand and receptor gene scores for those >1
def lig_rec_expression_filter(adata_iso_scores, adata_PD1_scores, filter_score, adata_type):
iso_expression_filter = ((adata_iso_scores['Lig Score'] > filter_score) & (adata_iso_scores['Rec Score'] > filter_score))
iso_scores_filter = adata_iso_scores[iso_expression_filter]
iso_scores_filter.to_csv(os.path.join('Iso_PD1_raw_gene_scores', adata_type + '_iso_scores_greater'+str(filter_score)+'.csv'), index=False)
PD1_expression_filter = ((adata_PD1_scores['Lig Score'] > filter_score) & (adata_PD1_scores['Rec Score'] > filter_score))
PD1_scores_filter = adata_PD1_scores[PD1_expression_filter]
PD1_scores_filter.to_csv(os.path.join('Iso_PD1_raw_gene_scores', adata_type + '_PD1_scores_greater'+str(filter_score)+'.csv'), index=False)
return(iso_scores_filter, PD1_scores_filter)
# Make DF variables for filtered lig-rec score tables
# FatT
fatT_iso_greater1, fatT_PD1_greater1 = lig_rec_expression_filter(fatT_iso_scores, fatT_PD1_scores, 1, 'fatT')
# FatC
fatC_iso_greater1, fatC_PD1_greater1 = lig_rec_expression_filter(fatC_iso_scores, fatC_PD1_scores, 1, 'fatC')
# # OnlyT
onlyT_iso_greater1, onlyT_PD1_greater1 = lig_rec_expression_filter(onlyT_iso_scores, onlyT_PD1_scores, 1, 'onlyT')
# # AllT
# allT_iso_greater1, allT_PD1_greater1 = lig_rec_expression_filter(allT_iso_scores, allT_PD1_scores, 1, 'allT')
# Merge iso/PD1 lig/rec score DFs, get LigXRec differences (specify if using Greater1 filter using filter_type argument)
def merge_lig_rec_scores(iso_scores_df, PD1_scores_df, adata_type, filter_type=''):
merged_scores = iso_scores_df.merge(PD1_scores_df,
left_on=['LigGeneID', 'RecGeneID', 'cellTypeLig', 'cellTypeRec', 'adata_type', 'Lig-Rec Gene Pairs', 'Lig-Rec Cell Pairs'],
right_on=['LigGeneID', 'RecGeneID', 'cellTypeLig', 'cellTypeRec', 'adata_type', 'Lig-Rec Gene Pairs', 'Lig-Rec Cell Pairs'],
suffixes=['_Iso', '_PD1'])
merged_scores = merged_scores.drop_duplicates()
merged_scores['LigXRec Diff (PD1-Iso)'] = merged_scores['LigXRec_PD1'] - merged_scores['LigXRec_Iso']
merged_scores = merged_scores.sort_values(by=['LigXRec Diff (PD1-Iso)'], ascending=False, ignore_index=True)
merged_scores.to_csv(os.path.join('Iso_PD1_raw_gene_scores', adata_type + '_merged_ligrec_scores' + filter_type + '.csv'), index=False)
return(merged_scores)
# Create merged iso/PD1 lig/rec score tables
fatT_merged_scores = merge_lig_rec_scores(fatT_iso_scores, fatT_PD1_scores, 'fatT')
fatT_merged_greater1 = merge_lig_rec_scores(fatT_iso_greater1, fatT_PD1_greater1, 'fatT', filter_type='_greater1')
fatC_merged_scores = merge_lig_rec_scores(fatC_iso_scores, fatC_PD1_scores, 'fatC')
fatC_merged_greater1 = merge_lig_rec_scores(fatC_iso_greater1, fatC_PD1_greater1, 'fatC', '_greater1')
onlyT_merged_scores = merge_lig_rec_scores(onlyT_iso_scores, onlyT_PD1_scores, 'onlyT')
onlyT_merged_greater1 = merge_lig_rec_scores(onlyT_iso_greater1, onlyT_PD1_greater1, 'onlyT', '_greater1')
# allT_merged_scores = merge_lig_rec_scores(allT_iso_scores, allT_PD1_scores, 'allT')
# allT_merged_greater1 = merge_lig_rec_scores(allT_iso_greater1, allT_PD1_greater1, 'allT', '_greater1')
# Calculate average LigXRec interactions between individual ligand and receptor clusters
# os.mkdir('LigXRec_outputs') # only need to run once
def ligxrec_cluster_average(adata_type, treatment):
scores_df = pd.read_csv(os.path.join('Iso_PD1_raw_gene_scores', adata_type+'_'+treatment+'_ligrec_scores.csv'))
clusters_df = pd.DataFrame(index=[cl for cl in cluster_names], columns=[cl for cl in cluster_names]) # index=ligands, cols=receptors
starttime = time.time()
for lig_cluster in tqdm(cluster_names):
print('Calculating Average LigXRec Scores for Ligand: ' + lig_cluster + '...')
for rec_cluster in cluster_names:
temp_df = scores_df[(scores_df['cellTypeLig'] == lig_cluster) & (scores_df['cellTypeRec'] == rec_cluster)].reset_index()
ligxrec_sum = 0
num_values = 0
for row in range(len(temp_df)): # Go thru each row of lig and rec clusters in file
if temp_df['cellTypeLig'][row] == temp_df['cellTypeRec'][row]:
continue # Ignore interactions between same clusters
if (temp_df['cellTypeLig'][row] == lig_cluster) and (temp_df['cellTypeRec'][row] == rec_cluster):
ligxrec_sum += temp_df['LigXRec'][row]
num_values += 1
try:
average_ligxrec = float(ligxrec_sum) / num_values
except ZeroDivisionError:
average_ligxrec = 0
clusters_df.loc[lig_cluster, rec_cluster] = average_ligxrec
clusters_df = clusters_df.replace(np.nan, 0)
print('FINISHED')
print(f'Total time elapsed: {time.time() - starttime} seconds')
clusters_df.to_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_'+adata_type+'_'+treatment+'.csv'))
return(clusters_df)
# Output DFs of average LigXRec values between cell locations and treatments and differences in values
fatT_average_ligxrec_iso = ligxrec_cluster_average('fatT', 'iso')
fatT_average_ligxrec_PD1 = ligxrec_cluster_average('fatT', 'PD1')
fatT_average_ligxrec_diff = fatT_average_ligxrec_PD1.sub(fatT_average_ligxrec_iso)
fatT_average_ligxrec_diff.to_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_fatT_diff.csv'))
fatC_average_ligxrec_iso = ligxrec_cluster_average('fatC', 'iso')
fatC_average_ligxrec_PD1 = ligxrec_cluster_average('fatC', 'PD1')
fatC_average_ligxrec_diff = fatC_average_ligxrec_PD1.sub(fatC_average_ligxrec_iso)
fatC_average_ligxrec_diff.to_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_fatC_diff.csv'))
onlyT_average_ligxrec_iso = ligxrec_cluster_average('onlyT', 'iso')
onlyT_average_ligxrec_PD1 = ligxrec_cluster_average('onlyT', 'PD1')
onlyT_average_ligxrec_diff = onlyT_average_ligxrec_PD1.sub(onlyT_average_ligxrec_iso)
onlyT_average_ligxrec_diff.to_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_onlyT_diff.csv'))
0%| | 0/36 [00:00<?, ?it/s]
Calculating Average LigXRec Scores for Ligand: tumor0...
3%|▎ | 1/36 [00:03<02:14, 3.83s/it]
Calculating Average LigXRec Scores for Ligand: endo1_T...
6%|▌ | 2/36 [00:07<02:08, 3.79s/it]
Calculating Average LigXRec Scores for Ligand: tumor2...
8%|▊ | 3/36 [00:10<01:53, 3.43s/it]
Calculating Average LigXRec Scores for Ligand: mac3_Arg1_T...
11%|█ | 4/36 [00:14<01:53, 3.54s/it]
Calculating Average LigXRec Scores for Ligand: endo4_F...
14%|█▍ | 5/36 [00:17<01:51, 3.58s/it]
Calculating Average LigXRec Scores for Ligand: mac5_F...
17%|█▋ | 6/36 [00:21<01:47, 3.59s/it]
Calculating Average LigXRec Scores for Ligand: mono6_T...
19%|█▉ | 7/36 [00:25<01:44, 3.60s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono7_T...
22%|██▏ | 8/36 [00:28<01:41, 3.61s/it]
Calculating Average LigXRec Scores for Ligand: DC8...
25%|██▌ | 9/36 [00:32<01:36, 3.59s/it]
Calculating Average LigXRec Scores for Ligand: fib9_IL33...
28%|██▊ | 10/36 [00:35<01:33, 3.58s/it]
Calculating Average LigXRec Scores for Ligand: myel10_Retnla_FatC...
31%|███ | 11/36 [00:39<01:29, 3.57s/it]
Calculating Average LigXRec Scores for Ligand: mac11_F...
33%|███▎ | 12/36 [00:43<01:26, 3.59s/it]
Calculating Average LigXRec Scores for Ligand: NK12...
36%|███▌ | 13/36 [00:46<01:22, 3.58s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono13_mix...
39%|███▉ | 14/36 [00:50<01:18, 3.57s/it]
Calculating Average LigXRec Scores for Ligand: fib14_Postn...
42%|████▏ | 15/36 [00:54<01:16, 3.66s/it]
Calculating Average LigXRec Scores for Ligand: T_cell15...
44%|████▍ | 16/36 [00:57<01:12, 3.63s/it]
Calculating Average LigXRec Scores for Ligand: endo16_T...
47%|████▋ | 17/36 [01:01<01:08, 3.58s/it]
Calculating Average LigXRec Scores for Ligand: tumor17...
50%|█████ | 18/36 [01:03<01:00, 3.37s/it]
Calculating Average LigXRec Scores for Ligand: fib_myel18...
53%|█████▎ | 19/36 [01:07<00:57, 3.38s/it]
Calculating Average LigXRec Scores for Ligand: peri19_T...
56%|█████▌ | 20/36 [01:10<00:54, 3.42s/it]
Calculating Average LigXRec Scores for Ligand: neutro20...
58%|█████▊ | 21/36 [01:14<00:51, 3.44s/it]
Calculating Average LigXRec Scores for Ligand: fib21_nonU...
61%|██████ | 22/36 [01:17<00:48, 3.43s/it]
Calculating Average LigXRec Scores for Ligand: tumor22...
64%|██████▍ | 23/36 [01:20<00:42, 3.28s/it]
Calculating Average LigXRec Scores for Ligand: fib23_T...
67%|██████▋ | 24/36 [01:24<00:39, 3.33s/it]
Calculating Average LigXRec Scores for Ligand: B_cell24...
69%|██████▉ | 25/36 [01:27<00:37, 3.40s/it]
Calculating Average LigXRec Scores for Ligand: tumor25...
72%|███████▏ | 26/36 [01:30<00:32, 3.24s/it]
Calculating Average LigXRec Scores for Ligand: myel26_poorQ...
75%|███████▌ | 27/36 [01:34<00:29, 3.30s/it]
Calculating Average LigXRec Scores for Ligand: fib27_Gpx3...
78%|███████▊ | 28/36 [01:37<00:26, 3.34s/it]
Calculating Average LigXRec Scores for Ligand: migDC28...
81%|████████ | 29/36 [01:40<00:23, 3.38s/it]
Calculating Average LigXRec Scores for Ligand: peri_endo29_F...
83%|████████▎ | 30/36 [01:44<00:20, 3.40s/it]
Calculating Average LigXRec Scores for Ligand: prol_myel30...
86%|████████▌ | 31/36 [01:47<00:17, 3.45s/it]
Calculating Average LigXRec Scores for Ligand: mast_baso31...
89%|████████▉ | 32/36 [01:51<00:13, 3.47s/it]
Calculating Average LigXRec Scores for Ligand: tumor32...
92%|█████████▏| 33/36 [01:54<00:09, 3.28s/it]
Calculating Average LigXRec Scores for Ligand: pDC33...
94%|█████████▍| 34/36 [01:57<00:06, 3.35s/it]
Calculating Average LigXRec Scores for Ligand: endo34_mix...
97%|█████████▋| 35/36 [02:01<00:03, 3.37s/it]
Calculating Average LigXRec Scores for Ligand: eos35...
100%|██████████| 36/36 [02:04<00:00, 3.47s/it]
FINISHED Total time elapsed: 124.87108778953552 seconds
0%| | 0/36 [00:00<?, ?it/s]
Calculating Average LigXRec Scores for Ligand: tumor0...
3%|▎ | 1/36 [00:03<02:14, 3.84s/it]
Calculating Average LigXRec Scores for Ligand: endo1_T...
6%|▌ | 2/36 [00:07<02:10, 3.85s/it]
Calculating Average LigXRec Scores for Ligand: tumor2...
8%|▊ | 3/36 [00:11<02:06, 3.83s/it]
Calculating Average LigXRec Scores for Ligand: mac3_Arg1_T...
11%|█ | 4/36 [00:15<02:03, 3.86s/it]
Calculating Average LigXRec Scores for Ligand: endo4_F...
14%|█▍ | 5/36 [00:19<01:58, 3.83s/it]
Calculating Average LigXRec Scores for Ligand: mac5_F...
17%|█▋ | 6/36 [00:22<01:53, 3.78s/it]
Calculating Average LigXRec Scores for Ligand: mono6_T...
19%|█▉ | 7/36 [00:26<01:50, 3.81s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono7_T...
22%|██▏ | 8/36 [00:30<01:46, 3.79s/it]
Calculating Average LigXRec Scores for Ligand: DC8...
25%|██▌ | 9/36 [00:34<01:41, 3.78s/it]
Calculating Average LigXRec Scores for Ligand: fib9_IL33...
28%|██▊ | 10/36 [00:37<01:37, 3.74s/it]
Calculating Average LigXRec Scores for Ligand: myel10_Retnla_FatC...
31%|███ | 11/36 [00:41<01:33, 3.73s/it]
Calculating Average LigXRec Scores for Ligand: mac11_F...
33%|███▎ | 12/36 [00:45<01:30, 3.77s/it]
Calculating Average LigXRec Scores for Ligand: NK12...
36%|███▌ | 13/36 [00:49<01:26, 3.77s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono13_mix...
39%|███▉ | 14/36 [00:52<01:22, 3.74s/it]
Calculating Average LigXRec Scores for Ligand: fib14_Postn...
42%|████▏ | 15/36 [00:56<01:18, 3.73s/it]
Calculating Average LigXRec Scores for Ligand: T_cell15...
44%|████▍ | 16/36 [01:00<01:14, 3.73s/it]
Calculating Average LigXRec Scores for Ligand: endo16_T...
47%|████▋ | 17/36 [01:04<01:10, 3.73s/it]
Calculating Average LigXRec Scores for Ligand: tumor17...
50%|█████ | 18/36 [01:07<01:03, 3.54s/it]
Calculating Average LigXRec Scores for Ligand: fib_myel18...
53%|█████▎ | 19/36 [01:10<01:01, 3.60s/it]
Calculating Average LigXRec Scores for Ligand: peri19_T...
56%|█████▌ | 20/36 [01:14<00:58, 3.66s/it]
Calculating Average LigXRec Scores for Ligand: neutro20...
58%|█████▊ | 21/36 [01:18<00:55, 3.67s/it]
Calculating Average LigXRec Scores for Ligand: fib21_nonU...
61%|██████ | 22/36 [01:22<00:51, 3.67s/it]
Calculating Average LigXRec Scores for Ligand: tumor22...
64%|██████▍ | 23/36 [01:25<00:45, 3.51s/it]
Calculating Average LigXRec Scores for Ligand: fib23_T...
67%|██████▋ | 24/36 [01:28<00:43, 3.59s/it]
Calculating Average LigXRec Scores for Ligand: B_cell24...
69%|██████▉ | 25/36 [01:32<00:40, 3.67s/it]
Calculating Average LigXRec Scores for Ligand: tumor25...
72%|███████▏ | 26/36 [01:35<00:34, 3.48s/it]
Calculating Average LigXRec Scores for Ligand: myel26_poorQ...
75%|███████▌ | 27/36 [01:39<00:32, 3.57s/it]
Calculating Average LigXRec Scores for Ligand: fib27_Gpx3...
78%|███████▊ | 28/36 [01:43<00:28, 3.61s/it]
Calculating Average LigXRec Scores for Ligand: migDC28...
81%|████████ | 29/36 [01:47<00:25, 3.68s/it]
Calculating Average LigXRec Scores for Ligand: peri_endo29_F...
83%|████████▎ | 30/36 [01:50<00:22, 3.69s/it]
Calculating Average LigXRec Scores for Ligand: prol_myel30...
86%|████████▌ | 31/36 [01:54<00:18, 3.69s/it]
Calculating Average LigXRec Scores for Ligand: mast_baso31...
89%|████████▉ | 32/36 [01:58<00:14, 3.74s/it]
Calculating Average LigXRec Scores for Ligand: tumor32...
92%|█████████▏| 33/36 [02:01<00:10, 3.57s/it]
Calculating Average LigXRec Scores for Ligand: pDC33...
94%|█████████▍| 34/36 [02:05<00:07, 3.62s/it]
Calculating Average LigXRec Scores for Ligand: endo34_mix...
97%|█████████▋| 35/36 [02:09<00:03, 3.69s/it]
Calculating Average LigXRec Scores for Ligand: eos35...
100%|██████████| 36/36 [02:13<00:00, 3.70s/it]
FINISHED Total time elapsed: 133.06602215766907 seconds
0%| | 0/36 [00:00<?, ?it/s]
Calculating Average LigXRec Scores for Ligand: tumor0...
3%|▎ | 1/36 [00:03<02:07, 3.64s/it]
Calculating Average LigXRec Scores for Ligand: endo1_T...
6%|▌ | 2/36 [00:07<02:03, 3.63s/it]
Calculating Average LigXRec Scores for Ligand: tumor2...
8%|▊ | 3/36 [00:10<01:50, 3.35s/it]
Calculating Average LigXRec Scores for Ligand: mac3_Arg1_T...
11%|█ | 4/36 [00:14<01:52, 3.51s/it]
Calculating Average LigXRec Scores for Ligand: endo4_F...
14%|█▍ | 5/36 [00:17<01:50, 3.56s/it]
Calculating Average LigXRec Scores for Ligand: mac5_F...
17%|█▋ | 6/36 [00:21<01:45, 3.53s/it]
Calculating Average LigXRec Scores for Ligand: mono6_T...
19%|█▉ | 7/36 [00:24<01:42, 3.55s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono7_T...
22%|██▏ | 8/36 [00:28<01:41, 3.63s/it]
Calculating Average LigXRec Scores for Ligand: DC8...
25%|██▌ | 9/36 [00:32<01:37, 3.60s/it]
Calculating Average LigXRec Scores for Ligand: fib9_IL33...
28%|██▊ | 10/36 [00:35<01:32, 3.56s/it]
Calculating Average LigXRec Scores for Ligand: myel10_Retnla_FatC...
31%|███ | 11/36 [00:38<01:27, 3.51s/it]
Calculating Average LigXRec Scores for Ligand: mac11_F...
33%|███▎ | 12/36 [00:42<01:24, 3.53s/it]
Calculating Average LigXRec Scores for Ligand: NK12...
36%|███▌ | 13/36 [00:46<01:20, 3.52s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono13_mix...
39%|███▉ | 14/36 [00:49<01:17, 3.51s/it]
Calculating Average LigXRec Scores for Ligand: fib14_Postn...
42%|████▏ | 15/36 [00:52<01:13, 3.51s/it]
Calculating Average LigXRec Scores for Ligand: T_cell15...
44%|████▍ | 16/36 [00:56<01:11, 3.56s/it]
Calculating Average LigXRec Scores for Ligand: endo16_T...
47%|████▋ | 17/36 [01:00<01:07, 3.54s/it]
Calculating Average LigXRec Scores for Ligand: tumor17...
50%|█████ | 18/36 [01:03<01:00, 3.34s/it]
Calculating Average LigXRec Scores for Ligand: fib_myel18...
53%|█████▎ | 19/36 [01:06<00:58, 3.43s/it]
Calculating Average LigXRec Scores for Ligand: peri19_T...
56%|█████▌ | 20/36 [01:10<00:56, 3.54s/it]
Calculating Average LigXRec Scores for Ligand: neutro20...
58%|█████▊ | 21/36 [01:14<00:53, 3.55s/it]
Calculating Average LigXRec Scores for Ligand: fib21_nonU...
61%|██████ | 22/36 [01:17<00:49, 3.52s/it]
Calculating Average LigXRec Scores for Ligand: tumor22...
64%|██████▍ | 23/36 [01:20<00:43, 3.37s/it]
Calculating Average LigXRec Scores for Ligand: fib23_T...
67%|██████▋ | 24/36 [01:24<00:41, 3.49s/it]
Calculating Average LigXRec Scores for Ligand: B_cell24...
69%|██████▉ | 25/36 [01:27<00:39, 3.55s/it]
Calculating Average LigXRec Scores for Ligand: tumor25...
72%|███████▏ | 26/36 [01:30<00:33, 3.37s/it]
Calculating Average LigXRec Scores for Ligand: myel26_poorQ...
75%|███████▌ | 27/36 [01:34<00:30, 3.41s/it]
Calculating Average LigXRec Scores for Ligand: fib27_Gpx3...
78%|███████▊ | 28/36 [01:37<00:27, 3.43s/it]
Calculating Average LigXRec Scores for Ligand: migDC28...
81%|████████ | 29/36 [01:41<00:24, 3.47s/it]
Calculating Average LigXRec Scores for Ligand: peri_endo29_F...
83%|████████▎ | 30/36 [01:44<00:20, 3.47s/it]
Calculating Average LigXRec Scores for Ligand: prol_myel30...
86%|████████▌ | 31/36 [01:48<00:17, 3.51s/it]
Calculating Average LigXRec Scores for Ligand: mast_baso31...
89%|████████▉ | 32/36 [01:52<00:14, 3.52s/it]
Calculating Average LigXRec Scores for Ligand: tumor32...
92%|█████████▏| 33/36 [01:55<00:10, 3.36s/it]
Calculating Average LigXRec Scores for Ligand: pDC33...
94%|█████████▍| 34/36 [01:58<00:07, 3.52s/it]
Calculating Average LigXRec Scores for Ligand: endo34_mix...
97%|█████████▋| 35/36 [02:03<00:03, 3.79s/it]
Calculating Average LigXRec Scores for Ligand: eos35...
100%|██████████| 36/36 [02:07<00:00, 3.53s/it]
FINISHED Total time elapsed: 127.22055077552795 seconds
0%| | 0/36 [00:00<?, ?it/s]
Calculating Average LigXRec Scores for Ligand: tumor0...
3%|▎ | 1/36 [00:04<02:46, 4.75s/it]
Calculating Average LigXRec Scores for Ligand: endo1_T...
6%|▌ | 2/36 [00:09<02:39, 4.69s/it]
Calculating Average LigXRec Scores for Ligand: tumor2...
8%|▊ | 3/36 [00:13<02:31, 4.60s/it]
Calculating Average LigXRec Scores for Ligand: mac3_Arg1_T...
11%|█ | 4/36 [00:18<02:27, 4.61s/it]
Calculating Average LigXRec Scores for Ligand: endo4_F...
14%|█▍ | 5/36 [00:23<02:23, 4.63s/it]
Calculating Average LigXRec Scores for Ligand: mac5_F...
17%|█▋ | 6/36 [00:27<02:18, 4.63s/it]
Calculating Average LigXRec Scores for Ligand: mono6_T...
19%|█▉ | 7/36 [00:32<02:14, 4.64s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono7_T...
22%|██▏ | 8/36 [00:37<02:09, 4.61s/it]
Calculating Average LigXRec Scores for Ligand: DC8...
25%|██▌ | 9/36 [00:41<02:03, 4.59s/it]
Calculating Average LigXRec Scores for Ligand: fib9_IL33...
28%|██▊ | 10/36 [00:46<01:58, 4.57s/it]
Calculating Average LigXRec Scores for Ligand: myel10_Retnla_FatC...
31%|███ | 11/36 [00:50<01:53, 4.55s/it]
Calculating Average LigXRec Scores for Ligand: mac11_F...
33%|███▎ | 12/36 [00:55<01:49, 4.57s/it]
Calculating Average LigXRec Scores for Ligand: NK12...
36%|███▌ | 13/36 [00:59<01:44, 4.56s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono13_mix...
39%|███▉ | 14/36 [01:04<01:40, 4.56s/it]
Calculating Average LigXRec Scores for Ligand: fib14_Postn...
42%|████▏ | 15/36 [01:08<01:35, 4.54s/it]
Calculating Average LigXRec Scores for Ligand: T_cell15...
44%|████▍ | 16/36 [01:13<01:30, 4.53s/it]
Calculating Average LigXRec Scores for Ligand: endo16_T...
47%|████▋ | 17/36 [01:17<01:26, 4.58s/it]
Calculating Average LigXRec Scores for Ligand: tumor17...
50%|█████ | 18/36 [01:22<01:23, 4.62s/it]
Calculating Average LigXRec Scores for Ligand: fib_myel18...
53%|█████▎ | 19/36 [01:27<01:18, 4.60s/it]
Calculating Average LigXRec Scores for Ligand: peri19_T...
56%|█████▌ | 20/36 [01:31<01:12, 4.55s/it]
Calculating Average LigXRec Scores for Ligand: neutro20...
58%|█████▊ | 21/36 [01:36<01:08, 4.57s/it]
Calculating Average LigXRec Scores for Ligand: fib21_nonU...
61%|██████ | 22/36 [01:40<01:03, 4.53s/it]
Calculating Average LigXRec Scores for Ligand: tumor22...
64%|██████▍ | 23/36 [01:45<00:59, 4.59s/it]
Calculating Average LigXRec Scores for Ligand: fib23_T...
67%|██████▋ | 24/36 [01:50<00:55, 4.60s/it]
Calculating Average LigXRec Scores for Ligand: B_cell24...
69%|██████▉ | 25/36 [01:54<00:50, 4.59s/it]
Calculating Average LigXRec Scores for Ligand: tumor25...
72%|███████▏ | 26/36 [01:59<00:46, 4.61s/it]
Calculating Average LigXRec Scores for Ligand: myel26_poorQ...
75%|███████▌ | 27/36 [02:03<00:41, 4.56s/it]
Calculating Average LigXRec Scores for Ligand: fib27_Gpx3...
78%|███████▊ | 28/36 [02:08<00:36, 4.53s/it]
Calculating Average LigXRec Scores for Ligand: migDC28...
81%|████████ | 29/36 [02:12<00:31, 4.54s/it]
Calculating Average LigXRec Scores for Ligand: peri_endo29_F...
83%|████████▎ | 30/36 [02:17<00:27, 4.52s/it]
Calculating Average LigXRec Scores for Ligand: prol_myel30...
86%|████████▌ | 31/36 [02:21<00:22, 4.53s/it]
Calculating Average LigXRec Scores for Ligand: mast_baso31...
89%|████████▉ | 32/36 [02:26<00:18, 4.56s/it]
Calculating Average LigXRec Scores for Ligand: tumor32...
92%|█████████▏| 33/36 [02:30<00:13, 4.35s/it]
Calculating Average LigXRec Scores for Ligand: pDC33...
94%|█████████▍| 34/36 [02:34<00:08, 4.42s/it]
Calculating Average LigXRec Scores for Ligand: endo34_mix...
97%|█████████▋| 35/36 [02:39<00:04, 4.44s/it]
Calculating Average LigXRec Scores for Ligand: eos35...
100%|██████████| 36/36 [02:43<00:00, 4.55s/it]
FINISHED Total time elapsed: 163.92462801933289 seconds
0%| | 0/36 [00:00<?, ?it/s]
Calculating Average LigXRec Scores for Ligand: tumor0...
3%|▎ | 1/36 [00:04<02:48, 4.81s/it]
Calculating Average LigXRec Scores for Ligand: endo1_T...
6%|▌ | 2/36 [00:09<02:43, 4.80s/it]
Calculating Average LigXRec Scores for Ligand: tumor2...
8%|▊ | 3/36 [00:14<02:36, 4.73s/it]
Calculating Average LigXRec Scores for Ligand: mac3_Arg1_T...
11%|█ | 4/36 [00:19<02:32, 4.78s/it]
Calculating Average LigXRec Scores for Ligand: endo4_F...
14%|█▍ | 5/36 [00:23<02:28, 4.79s/it]
Calculating Average LigXRec Scores for Ligand: mac5_F...
17%|█▋ | 6/36 [00:28<02:21, 4.73s/it]
Calculating Average LigXRec Scores for Ligand: mono6_T...
19%|█▉ | 7/36 [00:34<02:24, 4.99s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono7_T...
22%|██▏ | 8/36 [00:38<02:18, 4.95s/it]
Calculating Average LigXRec Scores for Ligand: DC8...
25%|██▌ | 9/36 [00:43<02:13, 4.96s/it]
Calculating Average LigXRec Scores for Ligand: fib9_IL33...
28%|██▊ | 10/36 [00:48<02:05, 4.84s/it]
Calculating Average LigXRec Scores for Ligand: myel10_Retnla_FatC...
31%|███ | 11/36 [00:53<02:01, 4.85s/it]
Calculating Average LigXRec Scores for Ligand: mac11_F...
33%|███▎ | 12/36 [00:58<01:57, 4.88s/it]
Calculating Average LigXRec Scores for Ligand: NK12...
36%|███▌ | 13/36 [01:03<01:52, 4.91s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono13_mix...
39%|███▉ | 14/36 [01:08<01:50, 5.01s/it]
Calculating Average LigXRec Scores for Ligand: fib14_Postn...
42%|████▏ | 15/36 [01:13<01:45, 5.02s/it]
Calculating Average LigXRec Scores for Ligand: T_cell15...
44%|████▍ | 16/36 [01:18<01:40, 5.01s/it]
Calculating Average LigXRec Scores for Ligand: endo16_T...
47%|████▋ | 17/36 [01:23<01:34, 4.97s/it]
Calculating Average LigXRec Scores for Ligand: tumor17...
50%|█████ | 18/36 [01:28<01:28, 4.90s/it]
Calculating Average LigXRec Scores for Ligand: fib_myel18...
53%|█████▎ | 19/36 [01:32<01:22, 4.83s/it]
Calculating Average LigXRec Scores for Ligand: peri19_T...
56%|█████▌ | 20/36 [01:37<01:16, 4.79s/it]
Calculating Average LigXRec Scores for Ligand: neutro20...
58%|█████▊ | 21/36 [01:42<01:12, 4.80s/it]
Calculating Average LigXRec Scores for Ligand: fib21_nonU...
61%|██████ | 22/36 [01:46<01:06, 4.75s/it]
Calculating Average LigXRec Scores for Ligand: tumor22...
64%|██████▍ | 23/36 [01:51<01:02, 4.77s/it]
Calculating Average LigXRec Scores for Ligand: fib23_T...
67%|██████▋ | 24/36 [01:56<00:56, 4.71s/it]
Calculating Average LigXRec Scores for Ligand: B_cell24...
69%|██████▉ | 25/36 [02:01<00:52, 4.75s/it]
Calculating Average LigXRec Scores for Ligand: tumor25...
72%|███████▏ | 26/36 [02:05<00:47, 4.75s/it]
Calculating Average LigXRec Scores for Ligand: myel26_poorQ...
75%|███████▌ | 27/36 [02:10<00:42, 4.73s/it]
Calculating Average LigXRec Scores for Ligand: fib27_Gpx3...
78%|███████▊ | 28/36 [02:15<00:37, 4.68s/it]
Calculating Average LigXRec Scores for Ligand: migDC28...
81%|████████ | 29/36 [02:20<00:33, 4.72s/it]
Calculating Average LigXRec Scores for Ligand: peri_endo29_F...
83%|████████▎ | 30/36 [02:24<00:28, 4.69s/it]
Calculating Average LigXRec Scores for Ligand: prol_myel30...
86%|████████▌ | 31/36 [02:29<00:23, 4.71s/it]
Calculating Average LigXRec Scores for Ligand: mast_baso31...
89%|████████▉ | 32/36 [02:34<00:19, 4.80s/it]
Calculating Average LigXRec Scores for Ligand: tumor32...
92%|█████████▏| 33/36 [02:39<00:14, 4.80s/it]
Calculating Average LigXRec Scores for Ligand: pDC33...
94%|█████████▍| 34/36 [02:43<00:09, 4.78s/it]
Calculating Average LigXRec Scores for Ligand: endo34_mix...
97%|█████████▋| 35/36 [02:48<00:04, 4.73s/it]
Calculating Average LigXRec Scores for Ligand: eos35...
100%|██████████| 36/36 [02:53<00:00, 4.82s/it]
FINISHED Total time elapsed: 173.42154598236084 seconds
0%| | 0/36 [00:00<?, ?it/s]
Calculating Average LigXRec Scores for Ligand: tumor0...
3%|▎ | 1/36 [00:04<02:39, 4.54s/it]
Calculating Average LigXRec Scores for Ligand: endo1_T...
6%|▌ | 2/36 [00:09<02:36, 4.59s/it]
Calculating Average LigXRec Scores for Ligand: tumor2...
8%|▊ | 3/36 [00:13<02:28, 4.51s/it]
Calculating Average LigXRec Scores for Ligand: mac3_Arg1_T...
11%|█ | 4/36 [00:18<02:23, 4.49s/it]
Calculating Average LigXRec Scores for Ligand: endo4_F...
14%|█▍ | 5/36 [00:22<02:19, 4.49s/it]
Calculating Average LigXRec Scores for Ligand: mac5_F...
17%|█▋ | 6/36 [00:27<02:14, 4.48s/it]
Calculating Average LigXRec Scores for Ligand: mono6_T...
19%|█▉ | 7/36 [00:31<02:09, 4.48s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono7_T...
22%|██▏ | 8/36 [00:35<02:05, 4.48s/it]
Calculating Average LigXRec Scores for Ligand: DC8...
25%|██▌ | 9/36 [00:40<02:01, 4.49s/it]
Calculating Average LigXRec Scores for Ligand: fib9_IL33...
28%|██▊ | 10/36 [00:44<01:55, 4.43s/it]
Calculating Average LigXRec Scores for Ligand: myel10_Retnla_FatC...
31%|███ | 11/36 [00:49<01:50, 4.44s/it]
Calculating Average LigXRec Scores for Ligand: mac11_F...
33%|███▎ | 12/36 [00:53<01:47, 4.47s/it]
Calculating Average LigXRec Scores for Ligand: NK12...
36%|███▌ | 13/36 [00:58<01:42, 4.47s/it]
Calculating Average LigXRec Scores for Ligand: mac_mono13_mix...
39%|███▉ | 14/36 [01:02<01:37, 4.42s/it]
Calculating Average LigXRec Scores for Ligand: fib14_Postn...
42%|████▏ | 15/36 [01:06<01:32, 4.40s/it]
Calculating Average LigXRec Scores for Ligand: T_cell15...
44%|████▍ | 16/36 [01:11<01:28, 4.41s/it]
Calculating Average LigXRec Scores for Ligand: endo16_T...
47%|████▋ | 17/36 [01:15<01:23, 4.41s/it]
Calculating Average LigXRec Scores for Ligand: tumor17...
50%|█████ | 18/36 [01:20<01:19, 4.43s/it]
Calculating Average LigXRec Scores for Ligand: fib_myel18...
53%|█████▎ | 19/36 [01:24<01:16, 4.51s/it]
Calculating Average LigXRec Scores for Ligand: peri19_T...
56%|█████▌ | 20/36 [01:29<01:12, 4.54s/it]
Calculating Average LigXRec Scores for Ligand: neutro20...
58%|█████▊ | 21/36 [01:33<01:07, 4.52s/it]
Calculating Average LigXRec Scores for Ligand: fib21_nonU...
61%|██████ | 22/36 [01:38<01:02, 4.49s/it]
Calculating Average LigXRec Scores for Ligand: tumor22...
64%|██████▍ | 23/36 [01:43<00:58, 4.53s/it]
Calculating Average LigXRec Scores for Ligand: fib23_T...
67%|██████▋ | 24/36 [01:47<00:54, 4.53s/it]
Calculating Average LigXRec Scores for Ligand: B_cell24...
69%|██████▉ | 25/36 [01:52<00:49, 4.54s/it]
Calculating Average LigXRec Scores for Ligand: tumor25...
72%|███████▏ | 26/36 [01:56<00:45, 4.54s/it]
Calculating Average LigXRec Scores for Ligand: myel26_poorQ...
75%|███████▌ | 27/36 [02:00<00:40, 4.48s/it]
Calculating Average LigXRec Scores for Ligand: fib27_Gpx3...
78%|███████▊ | 28/36 [02:05<00:35, 4.44s/it]
Calculating Average LigXRec Scores for Ligand: migDC28...
81%|████████ | 29/36 [02:09<00:31, 4.49s/it]
Calculating Average LigXRec Scores for Ligand: peri_endo29_F...
83%|████████▎ | 30/36 [02:14<00:26, 4.44s/it]
Calculating Average LigXRec Scores for Ligand: prol_myel30...
86%|████████▌ | 31/36 [02:18<00:22, 4.46s/it]
Calculating Average LigXRec Scores for Ligand: mast_baso31...
89%|████████▉ | 32/36 [02:23<00:18, 4.51s/it]
Calculating Average LigXRec Scores for Ligand: tumor32...
92%|█████████▏| 33/36 [02:28<00:13, 4.57s/it]
Calculating Average LigXRec Scores for Ligand: pDC33...
94%|█████████▍| 34/36 [02:32<00:09, 4.56s/it]
Calculating Average LigXRec Scores for Ligand: endo34_mix...
97%|█████████▋| 35/36 [02:36<00:04, 4.33s/it]
Calculating Average LigXRec Scores for Ligand: eos35...
100%|██████████| 36/36 [02:41<00:00, 4.48s/it]
FINISHED Total time elapsed: 161.2432508468628 seconds
# Log2(Fold change) of average LigXRec of clusters
def log2fc_ligxrec_averages(ligxrec_PD1, ligxrec_iso, adata_type):
logfc_df = pd.DataFrame(index=[cl for cl in cluster_names], columns=[cl for cl in cluster_names]) #index=ligand, col=receptor
for lig in cluster_names:
for rec in cluster_names:
if (ligxrec_PD1.loc[lig,rec] <= 0) or (ligxrec_iso.loc[lig,rec] <= 0):
logfc_df.loc[lig,rec] = 0
else:
fc = ligxrec_PD1.loc[lig,rec] / ligxrec_iso.loc[lig,rec]
logfc_df.loc[lig,rec] = math.log2(fc)
logfc_df.to_csv(os.path.join('LigXRec_outputs', 'ligxrec_clusters_averages_'+adata_type+'_log2fc.csv'))
return(logfc_df)
fatT_average_ligxrec_logfc = log2fc_ligxrec_averages(fatT_average_ligxrec_PD1, fatT_average_ligxrec_iso, 'fatT')
fatC_average_ligxrec_logfc = log2fc_ligxrec_averages(fatC_average_ligxrec_PD1, fatC_average_ligxrec_iso, 'fatC')
onlyT_average_ligxrec_logfc = log2fc_ligxrec_averages(onlyT_average_ligxrec_PD1, onlyT_average_ligxrec_iso, 'onlyT')
# Read in .csv files to save time
fatT_average_ligxrec_iso = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_fatT_iso.csv'), index_col=[0])
fatT_average_ligxrec_PD1 = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_fatT_PD1.csv'), index_col=[0])
fatT_average_ligxrec_diff = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_fatT_diff.csv'), index_col=[0])
fatC_average_ligxrec_iso = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_fatC_iso.csv'), index_col=[0])
fatC_average_ligxrec_PD1 = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_fatC_PD1.csv'), index_col=[0])
fatC_average_ligxrec_diff = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_fatC_diff.csv'), index_col=[0])
onlyT_average_ligxrec_iso = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_onlyT_iso.csv'), index_col=[0])
onlyT_average_ligxrec_PD1 = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_onlyT_PD1.csv'), index_col=[0])
onlyT_average_ligxrec_diff = pd.read_csv(os.path.join('LigXRec_outputs', 'ligxrec_cluster_averages_onlyT_diff.csv'), index_col=[0])
# DataFrame for outputting top/bottom LigXRec values between clusters
def top_bottom_ligxrec_averages(ligxrec_cluster_average, num, adata_tye_treatment, analysis):
top_bottom_values = []
for lig in ligxrec_cluster_average.index:
for rec in ligxrec_cluster_average.columns:
entry = [lig, rec, ligxrec_cluster_average.loc[lig,rec]]
top_bottom_values.append(entry)
df = pd.DataFrame(data=top_bottom_values, columns=['Ligand', 'Receptor', analysis])
df = df.sort_values(by=[analysis], ascending=False)
top_df = df[:num]
bottom_df = df[-num:]
top_bottom_df = pd.concat([top_df, bottom_df])
top_bottom_df.to_csv(os.path.join('LigXRec_outputs', 'top_bottom'+str(num)+'_ligxrec_cluster_averages_'+adata_tye_treatment+'.csv'), index=False)
return(top_bottom_df)
top_bottom5_fatT_diff = top_bottom_ligxrec_averages(fatT_average_ligxrec_diff, 5, 'fatT_diff', 'LigXRec Diff')
top_bottom5_fatC_diff = top_bottom_ligxrec_averages(fatC_average_ligxrec_diff, 5, 'fatC_diff', 'LigXRec Diff')
top_bottom5_onlyT_diff = top_bottom_ligxrec_averages(onlyT_average_ligxrec_diff, 5, 'onlyT_diff', 'LigXRec Diff')
top_bottom5_fatT_logfc = top_bottom_ligxrec_averages(fatT_average_ligxrec_logfc, 5, 'fatT_log2fc', 'Log2(FC)')
top_bottom5_fatC_logfc = top_bottom_ligxrec_averages(fatC_average_ligxrec_logfc, 5, 'fatC_log2fc', 'Log2(FC)')
top_bottom5_onlyT_logfc = top_bottom_ligxrec_averages(onlyT_average_ligxrec_logfc, 5, 'onlyT_log2fc', 'Log2(FC)')
# Matrixplot of LigXRec averages between clusters
def ligxrec_cluster_average_plot(adata_type_treat_df, color, adata_type, treatment, color_center=None):
data = adata_type_treat_df.replace(np.nan, 0)
data = data.astype(float)
data_np = data.to_numpy()
f, ax = plt.subplots(figsize=(20,20))
ax = sns.heatmap(data_np, cmap=color, center=color_center, cbar=True, linewidths=0.5, linecolor='gray', square=True, cbar_kws={'shrink': 0.5})
ax.set_ylabel('Ligand Cluster', fontstyle='oblique', fontsize=20)
ax.set_xlabel('Receptor Cluster', fontstyle='oblique', fontsize=20)
ax.set_xticklabels(adata_type_treat_df.columns)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
ax.set_yticklabels(adata_type_treat_df.index, rotation=0)
ax.set_title('Average LigXRec Interactions between Clusters ('+adata_type+', '+ treatment+')', fontweight='bold', fontsize=30)
plt.savefig(os.path.join('figures', 'matrix_average_LigXRec_cluster_interactions_'+adata_type+'_'+treatment+'.pdf'))
return ax
# FatT LigXRec matrixplots
ligxrec_cluster_average_plot(fatT_average_ligxrec_iso, 'YlOrRd', 'FatT', 'Isotype')
ligxrec_cluster_average_plot(fatT_average_ligxrec_PD1, 'YlOrRd', 'FatT', 'aPD-1')
ligxrec_cluster_average_plot(fatT_average_ligxrec_diff, 'bwr', 'FatT', 'Difference', 0)
ligxrec_cluster_average_plot(fatT_average_ligxrec_logfc, 'bwr', 'FatT', 'Log2(FC)', 0)
<AxesSubplot:title={'center':'Average LigXRec Interactions between Clusters (FatT, Log2(FC))'}, xlabel='Receptor Cluster', ylabel='Ligand Cluster'>
# FatC LigXRec matrixplots
ligxrec_cluster_average_plot(fatC_average_ligxrec_iso, 'YlOrRd', 'FatC', 'Isotype')
ligxrec_cluster_average_plot(fatC_average_ligxrec_PD1, 'YlOrRd', 'FatC', 'aPD-1')
ligxrec_cluster_average_plot(fatC_average_ligxrec_diff, 'bwr', 'FatC', 'Difference', 0)
ligxrec_cluster_average_plot(fatC_average_ligxrec_logfc, 'bwr', 'FatC', 'Log2(FC)', 0)
<AxesSubplot:title={'center':'Average LigXRec Interactions between Clusters (FatC, Log2(FC))'}, xlabel='Receptor Cluster', ylabel='Ligand Cluster'>
# OnlyT LigXRec matrixplots
ligxrec_cluster_average_plot(onlyT_average_ligxrec_iso, 'YlOrRd', 'OnlyT', 'Isotype')
ligxrec_cluster_average_plot(onlyT_average_ligxrec_PD1, 'YlOrRd', 'OnlyT', 'aPD-1')
ligxrec_cluster_average_plot(onlyT_average_ligxrec_diff, 'bwr', 'OnlyT', 'Difference', 0)
ligxrec_cluster_average_plot(onlyT_average_ligxrec_logfc, 'bwr', 'OnlyT', 'Log2(FC)', 0)
<AxesSubplot:title={'center':'Average LigXRec Interactions between Clusters (OnlyT, Log2(FC))'}, xlabel='Receptor Cluster', ylabel='Ligand Cluster'>
# Matrixplot for top/bottom LigXRec differences with PD1 treatment
def top_LigXRec_diff_plot(num, adata_type_merge_df, adata_type, title_filter='', filter_type=''):
# Top/Bottom LigXRec differences - specify num of differrences (ex. 10 highest and 10 lowest LigXRec differences)
top_scores = adata_type_merge_df[:num]
bottom_scores = adata_type_merge_df[-num:]
# Make top/bottom lig-rec cell/gene pairs DF and numpy array
top_bottom_scores = pd.concat([top_scores, bottom_scores], ignore_index=True)
top_bottom_scores.to_csv(os.path.join('LigXRec_outputs', 'top_bottom'+str(num)+'_ligxrec_' + adata_type + '.csv'), index=False)
np_df = np.zeros(shape=(len(top_bottom_scores['Lig-Rec Gene Pairs'].unique()), len(top_bottom_scores['Lig-Rec Cell Pairs'].unique())))
top_bottom_scores_df = pd.DataFrame(np_df, columns=top_bottom_scores['Lig-Rec Cell Pairs'].unique(),
index=top_bottom_scores['Lig-Rec Gene Pairs'].unique())
# Fill top and bottom scores DF with values from input file
for gene in top_bottom_scores['Lig-Rec Gene Pairs']:
for cell in top_bottom_scores['Lig-Rec Cell Pairs']:
try:
top_bottom_scores_df.loc[gene, cell] = top_bottom_scores[(top_bottom_scores['Lig-Rec Gene Pairs']==gene) &
(top_bottom_scores['Lig-Rec Cell Pairs']==cell)].iloc[:,-1].item()
except ValueError:
top_bottom_scores_df.loc[gene, cell] = 0
top_bottom_scores_df.to_csv(os.path.join('LigXRec_outputs', 'top_bottom'+str(num)+'_ligxrec_' + adata_type + '_array.csv'))
top_bottom_scores_np = top_bottom_scores_df.to_numpy()
# Matrixplot of top up-/downregulated gene-gene pair LigXRec differences
f, ax = plt.subplots(figsize=(20,20))
ax = sns.heatmap(top_bottom_scores_np, cmap='bwr', center=0, cbar=True,
linewidths=0.5, linecolor='gray', square=True, cbar_kws={'shrink': 0.5})
ax.set_ylabel('Lig.-Rec. Gene Pairs', fontstyle='oblique', fontsize=20)
ax.set_xlabel('Lig.-Rec. Clusters', fontstyle='oblique', fontsize=20)
ax.set_xticklabels(top_bottom_scores_df.columns)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
ax.set_yticklabels(top_bottom_scores_df.index, rotation=0)
ax.set_title('Top ' + str(num) + ' Up-/Downregulated Gene Pairs w/ aPD-1 Treatment ('+ adata_type + title_filter +')',
fontweight='bold', fontsize=25)
plt.savefig(os.path.join('figures', 'matrix_topBottom'+str(num)+'_LigXRecDiff_'+ adata_type + filter_type +'.pdf'))
return ax
# FatT top 10 LigXRec differences matrixplot - DONE
top_LigXRec_diff_plot(10, fatT_merged_scores, 'FatT') # unfiltered
top_LigXRec_diff_plot(10, fatT_merged_scores, 'FatT', ', Greater1 Filter', '_greater1') # lig/rec scores greater1 filter
# FatC top 10 LigXRec differences matrixplot - DONE
top_LigXRec_diff_plot(10, fatC_merged_scores, 'FatC') # unfiltered
top_LigXRec_diff_plot(10, fatC_merged_scores, 'FatC', ', Greater1 Filter', '_greater1') # lig/rec scores greater1 filter
# # OnlyT top 10 LigXRec differences matrixplot - DONE
top_LigXRec_diff_plot(10, onlyT_merged_scores, 'OnlyT') # unfiltered
top_LigXRec_diff_plot(10, onlyT_merged_scores, 'OnlyT', ', Greater1 Filter', '_greater1') # lig/rec scores greater1 filter
# AllT top 10 LigXRec differences matrixplot
# top_LigXRec_diff_plot(10, allT_merged_scores, 'AllT') # unfiltered
# top_LigXRec_diff_plot(10, allT_merged_scores, 'AllT', ', Greater1 Filter', '_greater1') # lig/rec scores greater1 filter
<AxesSubplot:title={'center':'Top 10 Up-/Downregulated Gene Pairs w/ aPD-1 Treatment (OnlyT, Greater1 Filter)'}, xlabel='Lig.-Rec. Clusters', ylabel='Lig.-Rec. Gene Pairs'>
# Save adata sets of ligand and receptor gene score comparisons
adataFatT.write(os.path.join('adata_steps', 'expB_step10_fatT_ligxrec_differences.h5ad'))
adataFatC.write(os.path.join('adata_steps', 'expB_step10_fatC_ligxrec_differences.h5ad'))
adataOnlyT.write(os.path.join('adata_steps', 'expB_step10_onlyT_ligxrec_differences.h5ad'))
# adataAllT.write(os.path.join('adata_steps', 'expB_step10_allT_ligxrec_comparisons.h5ad'))
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
Each tissue subset (FatT, FatC, and OnlyT) was compared to each other at the genetic interaction level to distinguish each set. This includes determining gene pairs that are unique to each set and comparing LigXRec values within each tissue set by comparing gene pairs found in both sets. Calculations of the differences between these values allows for comparisons of the magnitude of change in ligand-receptor interaction due to anti-PD-1 treatment in each tissue.
# Make DFs and print lists of common and unique gene pairs of UNFILTERED Exp. B adataType sets (FatT-FatC, FatT-OnlyT, FatC-OnlyT)
def common_and_unique_genes_DFs(adata_type1_DF, adata_type2_DF, adata_type1, adata_type2, num):
# Print out common/unique gene pairs for each adataType
set_ligrec_genes_type1 = set(adata_type1_DF['Lig-Rec Gene Pairs'])
set_ligrec_genes_type2 = set(adata_type2_DF['Lig-Rec Gene Pairs'])
common_genes = (set_ligrec_genes_type1.intersection(set_ligrec_genes_type2))
unique_genes1 = (set_ligrec_genes_type1.difference(set_ligrec_genes_type2))
unique_genes2 = (set_ligrec_genes_type2.difference(set_ligrec_genes_type1))
print(str(len(common_genes)), 'common gene pairs between ' +adata_type1+ ' and ' +adata_type2, '\n') #+ ': ' + str(commonGenes))
print(str(len(unique_genes1)), 'gene pairs unique to '+adata_type1+ ': ' + str(unique_genes1), '\n')
print(str(len(unique_genes2)), 'gene pairs unique to '+adata_type2+ ': ' + str(unique_genes2), '\n')
# Edit, merge DFs
adata_type1_DF_edit = adata_type1_DF.drop(columns=['LigGeneID', 'RecGeneID', 'cellTypeLig', 'cellTypeRec', 'LigXRec_Iso', 'Lig Score_Iso',
'Rec Score_Iso', 'LigXRec_PD1', 'Lig Score_PD1', 'Rec Score_PD1'])
adata_type2_DF_edit = adata_type2_DF.drop(columns=['LigGeneID', 'RecGeneID', 'cellTypeLig', 'cellTypeRec', 'LigXRec_Iso', 'Lig Score_Iso',
'Rec Score_Iso', 'LigXRec_PD1', 'Lig Score_PD1', 'Rec Score_PD1'])
adata_merge_DF = adata_type1_DF_edit.merge(adata_type2_DF_edit,
left_on=['Lig-Rec Gene Pairs', 'Lig-Rec Cell Pairs'],
right_on=['Lig-Rec Gene Pairs', 'Lig-Rec Cell Pairs'],
suffixes=['_1', '_2'])
adata_merge_DF = adata_merge_DF.drop_duplicates()
adata_merge_DF['Diff of LigXRec Diff (1-2)'] = adata_merge_DF['LigXRec Diff (PD1-Iso)_1'] - adata_merge_DF['LigXRec Diff (PD1-Iso)_2']
adata_merge_DF = adata_merge_DF.sort_values(by=['Diff of LigXRec Diff (1-2)'], ascending=False, ignore_index=True)
# Build numpy arrays of top/bottom differences
top_diff = adata_merge_DF[:num]
bottom_diff = adata_merge_DF[-num:]
topbottom_diff = pd.concat([top_diff, bottom_diff], ignore_index=True).drop_duplicates()
topbottom_diff.to_csv(os.path.join('LigXRec_outputs', 'top_bottom'+str(num)+'_ligxrec_diffs_'+adata_type1+'_'+adata_type2+'.csv'),
index=False)
np_df = np.zeros(shape=(len(topbottom_diff['Lig-Rec Gene Pairs'].unique()), len(topbottom_diff['Lig-Rec Cell Pairs'].unique())))
ligxrec_DF = pd.DataFrame(np_df, columns=topbottom_diff['Lig-Rec Cell Pairs'].unique(), index=topbottom_diff['Lig-Rec Gene Pairs'].unique())
for gene in topbottom_diff['Lig-Rec Gene Pairs'].unique():
for cell in topbottom_diff['Lig-Rec Cell Pairs'].unique():
## have to use .unique() after .iloc[] due to multiple rows with same value causing ValueError - need only Series size of 1
try:
ligxrec_DF.loc[gene,cell] = topbottom_diff[(topbottom_diff['Lig-Rec Gene Pairs']==gene) &
(topbottom_diff['Lig-Rec Cell Pairs']==cell)].iloc[:,-1].unique()
except ValueError:
ligxrec_DF.loc[gene,cell] = 0
ligxrec_DF = ligxrec_DF.drop_duplicates()
ligxrec_DF.to_csv(os.path.join('LigXRec_outputs', 'top_bottom'+str(num)+'_ligxrec_diffs_'+adata_type1+'_'+adata_type2+'_array.csv'))
ligxrec_NP = ligxrec_DF.to_numpy()
# csv tables of top/bottom diff. of LigXRec difference values
topbottom_csv = topbottom_diff[['Lig-Rec Gene Pairs', 'Lig-Rec Cell Pairs', 'LigXRec Diff (PD1-Iso)_1',
'LigXRec Diff (PD1-Iso)_2', 'Diff of LigXRec Diff (1-2)']]
topbottom_csv.to_csv(os.path.join('LigXRec_outputs', 'ligxrec_diffs_'+adata_type1+'_'+adata_type2+'.csv'), index=False)
# Matrixplot of differences of LigXRec differences of common gene-gene pairs found in both adataTypes #
f, ax = plt.subplots(figsize=(20,20))
ax = sns.heatmap(ligxrec_NP, cmap='bwr', center=0, cbar=True, linewidths=0.5, linecolor='gray', square=True, cbar_kws={'shrink': 0.5})
ax.set_ylabel('Common Lig.-Rec. Gene Pairs', fontstyle='oblique', fontsize=20)
ax.set_xlabel('Common Lig.-Rec. Clusters', fontstyle='oblique', fontsize=20)
ax.set_xticklabels(ligxrec_DF.columns)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
ax.set_yticklabels(ligxrec_DF.index, rotation=0)
ax.set_title('Top '+ str(num) + ' LigXRec Differences between '+ adata_type1 + ' and '+ adata_type2 +'', fontweight='bold', fontsize=25)
plt.savefig(os.path.join('figures', 'matrix_topBottom'+str(num)+'_ligxrec_diff_'+adata_type1+'_'+adata_type2+'.pdf'))
return ax
# FatT-FatC gene pair comparisons
common_and_unique_genes_DFs(fatT_merged_scores, fatC_merged_scores, 'FatT', 'FatC', 10)
1428 common gene pairs between FatT and FatC
70 gene pairs unique to FatT: {'Gphb5-Tshr', 'Adcyap1-Vipr2', 'Ccl28-Ccr3', 'Calr-Tshr', 'Tnc-Cntn1', 'Guca2b-Gucy2c', 'Slit1-Robo2', 'Gcg-Glp2r', 'Gpha2-Tshr', 'Btc-Erbb3', 'Pomc-Oprm1', 'Gdf5-Ror2', 'Pomc-Oprd1', 'Guca2b-Gucy2d', 'Nxph1-Nrxn1', 'Dsc2-Dsg2', 'Sct-Vipr2', 'Fgf13-Scn5a', 'Fgf9-Fgfr4', 'Ihh-Ptch2', 'Tnfsf11-Tnfrsf11b', 'Tac4-Tacr1', 'Fgf6-Fgfr4', 'Dkk2-Kremen2', 'Cthrc1-Fzd3', 'Ntn4-Unc5a', 'Tg-Lrp2', 'Adcyap1-Sctr', 'Sct-Vipr1', 'Amh-Amhr2', 'Nodal-Acvr1b', 'Lamc2-Itga3', 'Fgf6-Fgfr2', 'Rph3a-Nrxn1', 'Col2a1-Itga10', 'Nrg2-Erbb3', 'Slamf1-Slamf1', 'Adgrb1-Rtn4r', 'Inhbb-Acvr1b', 'Adcyap1-Vipr1', 'Nrg1-Erbb4', 'Ihh-Hhip', 'Egf-Erbb3', 'Inhbb-Acvr1', 'Fgf1-Fgfr2', 'Fgf9-Fgfr2', 'Fgf1-Fgfr4', 'Nlgn1-Nrxn1', 'F2-F2rl2', 'Arf1-Pld2', 'Nrg1-Erbb3', 'Omg-Rtn4r', 'Tg-Asgr1', 'Lta-Tnfrsf14', 'Agrn-Musk', 'Il17b-Il17rb', 'Nlgn1-Nrxn3', 'Rspo4-Lgr4', 'Sct-Sctr', 'Il23a-Il12rb1', 'Tac4-Tacr2', 'Robo2-Robo2', 'Ntng2-Lrrc4', 'Mst1-Mst1r', 'Nrg2-Erbb4', 'Calr-Itga3', 'Egf-Erbb2', 'Wnt1-Ror2', 'Cthrc1-Ror2', 'Crlf1-Cntfr'}
61 gene pairs unique to FatC: {'Tnfsf8-Tnfrsf8', 'Ntng1-Lrrc4c', 'Sema3a-Plxna1', 'Lrpap1-Vldlr', 'Lrrc4b-Ptprd', 'Npnt-Itga8', 'Efna5-Epha4', 'Fgf17-Fgfr3', 'Lama1-Itgb8', 'Uts2-Uts2r', 'Vegfc-Vipr2', 'Hcrt-Hcrtr2', 'Il17a-Il17rc', 'Lhb-Lhcgr', 'Cntn1-Ptprz1', 'Fgf23-Fgfr3', 'Rspo3-Lgr4', 'Fgg-Itga5', 'Dkk1-Kremen1', 'Cntn2-Cntnap2', 'Lrpap1-Lrp2', 'Inhba-Acvr1', 'Fgf8-Fgfr3', 'Rspo2-Lgr6', 'Efna5-Ephb1', 'Efnb3-Epha4', 'Ccl25-Ackr4', 'Wnt7a-Fzd9', 'Angptl3-Itga5', 'Rspo3-Lgr6', 'Efna5-Epha1', 'Wnt3-Fzd7', 'Pcsk9-Vldlr', 'Fgf16-Fgfr3', 'Efna4-Epha4', 'Rspo3-Lgr5', 'Cxcl3-Cxcr1', 'Efna3-Epha4', 'Fgb-Itga5', 'Wnt10b-Fzd7', 'Dkk1-Kremen2', 'Hbegf-Erbb2', 'Wnt1-Fzd9', 'Btc-Erbb4', 'Rspo2-Lgr5', 'Efna5-Epha5', 'Vcam1-Itgad', 'Comp-Itga5', 'Ccl25-Ccr10', 'Nrg4-Erbb4', 'Wnt6-Fzd7', 'Wnt5b-Fzd2', 'F8-Asgr2', 'Cxcl5-Cxcr1', 'Hbegf-Erbb4', 'Fgf5-Fgfr3', 'Efna5-Epha7', 'Bsg-Slc16a7', 'Cga-Lhcgr', 'Lrfn5-Lrfn5', 'Ifnl2-Ifnlr1'}
<AxesSubplot:title={'center':'Top 10 LigXRec Differences between FatT and FatC'}, xlabel='Common Lig.-Rec. Clusters', ylabel='Common Lig.-Rec. Gene Pairs'>
# FatT-OnlyT gene pair comparisons
common_and_unique_genes_DFs(fatT_merged_scores, onlyT_merged_scores, 'FatT', 'OnlyT', 10)
1400 common gene pairs between FatT and OnlyT
98 gene pairs unique to FatT: {'Mmp7-Erbb4', 'Ccl28-Ccr3', 'Nppc-Npr2', 'Mstn-Acvr2b', 'Gdf11-Acvr1b', 'Tnfsf13-Tnfrsf17', 'Dsc2-Dsg2', 'Cntn2-Cntn1', 'Bmp7-Acvr2a', 'Kng1-Gpr135', 'Dkk2-Kremen2', 'Bmp10-Acvr2a', 'Fgf6-Fgfr2', 'Wnt9b-Fzd8', 'Kng1-Gp1ba', 'Wnt3-Fzd8', 'Dll3-Notch2', 'F12-Gp1ba', 'Cort-Mrgprx2', 'Slitrk3-Ptprd', 'Wnt1-Fzd8', 'Wnt11-Fzd8', 'Adcyap1-Vipr2', 'Wnt7b-Fzd5', 'Nppc-Npr3', 'Gcg-Glp2r', 'Guca2b-Gucy2d', 'Wnt8a-Fzd8', 'Nxph1-Nrxn1', 'Ostn-Npr3', 'Fgf13-Scn5a', 'Tac4-Tacr1', 'Tg-Lrp2', 'Ihh-Ptch1', 'Amh-Amhr2', 'Gdf2-Acvr2a', 'Gdf11-Acvr2a', 'Lama1-Itga2', 'Col2a1-Itga2', 'Nodal-Acvr1c', 'Fgf9-Fgfr2', 'Rspo3-Fzd8', 'Wnt8a-Fzd5', 'Il5-Il5ra', 'Tg-Asgr1', 'Gdf5-Acvr2a', 'Ntng2-Lrrc4', 'Gdf2-Acvr2b', 'Nrg2-Erbb4', 'Wnt1-Ror2', 'Bmp15-Acvr2a', 'Il24-Il20ra', 'Gphb5-Tshr', 'Calca-Calcr', 'Gpha2-Tshr', 'Gip-Gipr', 'Bmp8a-Acvr2b', 'Bmp7-Bmpr1b', 'F2-Gp1ba', 'Adcyap1-Sctr', 'Rspo4-Lgr6', 'Cntn1-Notch2', 'Col2a1-Itga10', 'Il19-Il20ra', 'Dhh-Ptch1', 'Tshb-Tshr', 'Bmp7-Acvr2b', 'Nodal-Acvr2b', 'Wnt9b-Fzd5', 'Guca2b-Gucy2c', 'Prl-Prlr', 'Gdf5-Ror2', 'Rgmb-Neo1', 'C1qtnf1-Avpr2', 'Fgf9-Fgfr4', 'Rspo4-Lgr5', 'Fgf6-Fgfr4', 'Insl5-Rxfp3', 'Wnt5b-Fzd8', 'Nodal-Acvr1b', 'Bmp8a-Acvr2a', 'Nodal-Acvr2a', 'Adcyap1-Vipr1', 'Wnt7b-Fzd8', 'Artn-Ret', 'Lefty2-Acvr2b', 'Lgi3-Adam22', 'Tac4-Tacr3', 'Gdf11-Acvr2b', 'Nlgn1-Nrxn1', 'Wnt7a-Fzd5', 'Chad-Itga2', 'Gdf5-Acvr2b', 'Plau-Lrp2', 'Nlgn1-Nrxn3', 'Tac4-Tacr2', 'Pspn-Ret', 'Bmp6-Acvr2a'}
66 gene pairs unique to OnlyT: {'Btc-Erbb2', 'Tnfsf8-Tnfrsf8', 'Fgf8-Fgfrl1', 'Sema3a-Plxna1', 'Slitrk5-Ptprd', 'F2-F2rl1', 'Lrpap1-Vldlr', 'Ereg-Erbb4', 'Efna5-Epha4', 'Fgf17-Fgfr3', 'Vegfc-Vipr2', 'Arf1-Chrm3', 'Nlgn2-Nrxn1', 'Nlgn2-Nrxn3', 'Efnb3-Rhbdl2', 'Il17a-Il17rc', 'Calcb-Calcr', 'Lrfn3-Lrfn3', 'Gdf5-Acvr1', 'Fgf23-Fgfr3', 'Col9a3-Mag', 'Fgg-Itga5', 'Dkk1-Kremen1', 'Pomc-Mc1r', 'Tac1-Tacr2', 'Inhba-Acvr1', 'Fgf8-Fgfr3', 'Efna5-Ephb1', 'Lrpap1-Lrp2', 'Ccl25-Ackr4', 'Efna5-Epha1', 'Angptl3-Itga5', 'Wnt3-Fzd7', 'Efna4-Epha5', 'Fgf16-Fgfr3', 'Efna4-Epha4', 'Rspo3-Lgr5', 'Cxcl3-Cxcr1', 'Efnb3-Ephb6', 'Pcsk9-Vldlr', 'Fgb-Itga5', 'Bmp6-Acvr1', 'Wnt10b-Fzd7', 'Shank1-Sstr2', 'Lgi3-Stx1a', 'Hbegf-Erbb2', 'Efna5-Epha5', 'Vcam1-Itgad', 'Comp-Itga5', 'Wnt6-Fzd7', 'Wnt5b-Fzd2', 'Amh-Acvr1', 'F8-Asgr2', 'Eda-Eda2r', 'Ccl28-Ccr10', 'Cxcl5-Cxcr1', 'Adm2-Calcr', 'Fgf5-Fgfr3', 'Tgfa-Erbb2', 'Efna5-Epha7', 'Hbegf-Erbb4', 'Bsg-Slc16a7', 'Plg-F2rl1', 'Tnfsf13-Tnfrsf11b', 'Ccl25-Ccr10', 'Nts-Ntsr2'}
<AxesSubplot:title={'center':'Top 10 LigXRec Differences between FatT and OnlyT'}, xlabel='Common Lig.-Rec. Clusters', ylabel='Common Lig.-Rec. Gene Pairs'>
# FatC-OnlyT gene pair comparisons
common_and_unique_genes_DFs(fatC_merged_scores, onlyT_merged_scores, 'FatC', 'OnlyT', 10)
1398 common gene pairs between FatC and OnlyT
91 gene pairs unique to FatC: {'Mmp7-Erbb4', 'Nppc-Npr2', 'Mstn-Acvr2b', 'Gdf11-Acvr1b', 'Tnfsf13-Tnfrsf17', 'Cntn2-Cntn1', 'Uts2-Uts2r', 'Bmp7-Acvr2a', 'Kng1-Gpr135', 'Bmp10-Acvr2a', 'Rspo3-Lgr4', 'Wnt9b-Fzd8', 'Cntn2-Cntnap2', 'Kng1-Gp1ba', 'Wnt3-Fzd8', 'Dll3-Notch2', 'F12-Gp1ba', 'Rspo2-Lgr5', 'Cort-Mrgprx2', 'Slitrk3-Ptprd', 'Lrfn5-Lrfn5', 'Wnt1-Fzd8', 'Wnt11-Fzd8', 'Ntng1-Lrrc4c', 'Wnt7b-Fzd5', 'Nppc-Npr3', 'Wnt8a-Fzd8', 'Ostn-Npr3', 'Lama1-Itgb8', 'Hcrt-Hcrtr2', 'Ihh-Ptch1', 'Cntn1-Ptprz1', 'Gdf2-Acvr2a', 'Gdf11-Acvr2a', 'Lama1-Itga2', 'Rspo2-Lgr6', 'Col2a1-Itga2', 'Nodal-Acvr1c', 'Rspo3-Fzd8', 'Wnt8a-Fzd5', 'Wnt1-Fzd9', 'Il5-Il5ra', 'Gdf5-Acvr2a', 'Gdf2-Acvr2b', 'Cga-Lhcgr', 'Bmp15-Acvr2a', 'Il24-Il20ra', 'Calca-Calcr', 'Lrrc4b-Ptprd', 'Gip-Gipr', 'Bmp8a-Acvr2b', 'Bmp7-Bmpr1b', 'F2-Gp1ba', 'Rspo4-Lgr6', 'Cntn1-Notch2', 'Il19-Il20ra', 'Dhh-Ptch1', 'Tshb-Tshr', 'Bmp7-Acvr2b', 'Dkk1-Kremen2', 'Nodal-Acvr2b', 'Wnt9b-Fzd5', 'Ifnl2-Ifnlr1', 'Prl-Prlr', 'Npnt-Itga8', 'Rgmb-Neo1', 'C1qtnf1-Avpr2', 'Rspo4-Lgr5', 'Insl5-Rxfp3', 'Lhb-Lhcgr', 'Wnt5b-Fzd8', 'Efnb3-Epha4', 'Bmp8a-Acvr2a', 'Nodal-Acvr2a', 'Wnt7a-Fzd9', 'Rspo3-Lgr6', 'Wnt7b-Fzd8', 'Artn-Ret', 'Lefty2-Acvr2b', 'Efna3-Epha4', 'Lgi3-Adam22', 'Tac4-Tacr3', 'Gdf11-Acvr2b', 'Wnt7a-Fzd5', 'Chad-Itga2', 'Btc-Erbb4', 'Gdf5-Acvr2b', 'Plau-Lrp2', 'Nrg4-Erbb4', 'Pspn-Ret', 'Bmp6-Acvr2a'}
68 gene pairs unique to OnlyT: {'Btc-Erbb2', 'Calr-Tshr', 'Tnc-Cntn1', 'Fgf8-Fgfrl1', 'Slit1-Robo2', 'Slitrk5-Ptprd', 'F2-F2rl1', 'Pomc-Oprm1', 'Pomc-Oprd1', 'Btc-Erbb3', 'Ereg-Erbb4', 'Arf1-Chrm3', 'Sct-Vipr2', 'Nlgn2-Nrxn1', 'Nlgn2-Nrxn3', 'Ihh-Ptch2', 'Efnb3-Rhbdl2', 'Tnfsf11-Tnfrsf11b', 'Cthrc1-Fzd3', 'Ntn4-Unc5a', 'Calcb-Calcr', 'Sct-Vipr1', 'Lamc2-Itga3', 'Lrfn3-Lrfn3', 'Rph3a-Nrxn1', 'Gdf5-Acvr1', 'Col9a3-Mag', 'Pomc-Mc1r', 'Nrg2-Erbb3', 'Tac1-Tacr2', 'Slamf1-Slamf1', 'Adgrb1-Rtn4r', 'Inhbb-Acvr1b', 'Nrg1-Erbb4', 'Ihh-Hhip', 'Egf-Erbb3', 'Inhbb-Acvr1', 'Efna4-Epha5', 'Efnb3-Ephb6', 'Fgf1-Fgfr2', 'Fgf1-Fgfr4', 'Bmp6-Acvr1', 'Shank1-Sstr2', 'Lgi3-Stx1a', 'F2-F2rl2', 'Arf1-Pld2', 'Nrg1-Erbb3', 'Omg-Rtn4r', 'Lta-Tnfrsf14', 'Agrn-Musk', 'Il17b-Il17rb', 'Nts-Ntsr2', 'Rspo4-Lgr4', 'Amh-Acvr1', 'Sct-Sctr', 'Il23a-Il12rb1', 'Eda-Eda2r', 'Ccl28-Ccr10', 'Robo2-Robo2', 'Adm2-Calcr', 'Mst1-Mst1r', 'Tgfa-Erbb2', 'Calr-Itga3', 'Plg-F2rl1', 'Egf-Erbb2', 'Tnfsf13-Tnfrsf11b', 'Cthrc1-Ror2', 'Crlf1-Cntfr'}
<AxesSubplot:title={'center':'Top 10 LigXRec Differences between FatC and OnlyT'}, xlabel='Common Lig.-Rec. Clusters', ylabel='Common Lig.-Rec. Gene Pairs'>
# Compare gene pairs between all cell type sets
fatT_set = set(fatT_merged_scores['Lig-Rec Gene Pairs'])
fatC_set = set(fatC_merged_scores['Lig-Rec Gene Pairs'])
onlyT_set = set(onlyT_merged_scores['Lig-Rec Gene Pairs'])
fatT_unique_genes = fatT_set.difference(onlyT_set, fatC_set)
fatC_unique_genes = fatC_set.difference(onlyT_set, fatT_set)
onlyT_unique_genes = onlyT_set.difference(fatT_set, fatC_set)
print(str(len(onlyT_set.intersection(fatT_set, fatC_set))), 'gene pairs common between all cell types', '\n')
print(str(len(fatT_unique_genes)), 'gene pairs unique to FatT:', fatT_unique_genes, '\n')
print(str(len(fatC_unique_genes)), 'gene pairs unique to FatC:', fatC_unique_genes, '\n')
print(str(len(onlyT_unique_genes)), 'gene pairs unique to OnlyT:', onlyT_unique_genes, '\n')
1360 gene pairs common between all cell types
30 gene pairs unique to FatT: {'Ccl28-Ccr3', 'Dsc2-Dsg2', 'Dkk2-Kremen2', 'Fgf6-Fgfr2', 'Adcyap1-Vipr2', 'Gcg-Glp2r', 'Guca2b-Gucy2d', 'Nxph1-Nrxn1', 'Fgf13-Scn5a', 'Tac4-Tacr1', 'Tg-Lrp2', 'Amh-Amhr2', 'Fgf9-Fgfr2', 'Tg-Asgr1', 'Ntng2-Lrrc4', 'Nrg2-Erbb4', 'Wnt1-Ror2', 'Gphb5-Tshr', 'Gpha2-Tshr', 'Adcyap1-Sctr', 'Col2a1-Itga10', 'Guca2b-Gucy2c', 'Gdf5-Ror2', 'Fgf9-Fgfr4', 'Fgf6-Fgfr4', 'Nodal-Acvr1b', 'Adcyap1-Vipr1', 'Nlgn1-Nrxn1', 'Nlgn1-Nrxn3', 'Tac4-Tacr2'}
23 gene pairs unique to FatC: {'Uts2-Uts2r', 'Rspo3-Lgr4', 'Cntn2-Cntnap2', 'Rspo2-Lgr5', 'Lrfn5-Lrfn5', 'Ntng1-Lrrc4c', 'Lama1-Itgb8', 'Hcrt-Hcrtr2', 'Cntn1-Ptprz1', 'Rspo2-Lgr6', 'Wnt1-Fzd9', 'Cga-Lhcgr', 'Lrrc4b-Ptprd', 'Dkk1-Kremen2', 'Ifnl2-Ifnlr1', 'Npnt-Itga8', 'Lhb-Lhcgr', 'Efnb3-Epha4', 'Wnt7a-Fzd9', 'Rspo3-Lgr6', 'Efna3-Epha4', 'Btc-Erbb4', 'Nrg4-Erbb4'}
28 gene pairs unique to OnlyT: {'Btc-Erbb2', 'Fgf8-Fgfrl1', 'Slitrk5-Ptprd', 'Ereg-Erbb4', 'Arf1-Chrm3', 'Nlgn2-Nrxn1', 'Nlgn2-Nrxn3', 'Efnb3-Rhbdl2', 'Calcb-Calcr', 'Lrfn3-Lrfn3', 'Gdf5-Acvr1', 'Col9a3-Mag', 'Pomc-Mc1r', 'Tac1-Tacr2', 'Efna4-Epha5', 'Efnb3-Ephb6', 'Bmp6-Acvr1', 'Shank1-Sstr2', 'Lgi3-Stx1a', 'Nts-Ntsr2', 'Amh-Acvr1', 'Eda-Eda2r', 'Ccl28-Ccr10', 'Adm2-Calcr', 'Tgfa-Erbb2', 'Plg-F2rl1', 'Tnfsf13-Tnfrsf11b', 'F2-F2rl1'}
adataFatT.write(os.path.join('adata_steps', 'expB_step11_fatT_gene_comparisons.h5ad'))
adataFatC.write(os.path.join('adata_steps', 'expB_step11_fatC_gene_comparisons.h5ad'))
adataOnlyT.write(os.path.join('adata_steps', 'expB_step11_onlyT_gene_comparisons.h5ad'))
# adataAllT.write('adata_raw_var_FT/expB_raw_var_FT_step10_allTumor_gene_comparisons.h5ad')
/opt/anaconda3/envs/Python38/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead if is_string_dtype(df[key]) and not is_categorical(df[key])
import plotly.graph_objects as go
import chart_studio.plotly as py
import plotly.express as px
ligxrec_fc_pd = pd.read_csv('LigXRec_outputs/ligxrec_clusters_averages_onlyT_log2fc.csv', index_col=0)
ligxrec_fc_np = ligxrec_fc_pd.to_numpy()
data=ligxrec_fc_np
#px.defaults.color_continuous_scale = px.colors.diverging.Picnic
fig = px.imshow(data,
labels=dict(x="Receptor Cluster", y="Ligand Cluster", color="Log2FC"),
x=ligxrec_fc_pd.columns,
y=ligxrec_fc_pd.index,
color_continuous_scale=px.colors.diverging.Picnic,
color_continuous_midpoint=0,
title='Average LigXRec Interactions, Tumor (Log2FC)')
fig.update_xaxes(tickangle=325)
fig.update_xaxes(tickfont_size=10)
fig.update_yaxes(tickfont_size=10)
fig.show()
ligxrec_diff_pd = pd.read_csv('LigXRec_outputs/top_bottom10_ligxrec_FatT_array.csv', index_col=0)
ligxrec_diff_np = ligxrec_diff_pd.to_numpy()
data = ligxrec_diff_np
fig = px.imshow(data,
labels=dict(x="Lig-Rec Clusters", y="Lig-Rec Genes", color="LigXRec Diff"),
x=ligxrec_diff_pd.columns,
y=ligxrec_diff_pd.index,
color_continuous_scale=px.colors.diverging.Picnic,
color_continuous_midpoint=0,
title='Top/Bottom 10 LigXRec Differences w/ aPD-1 (FatT)')
fig.update_xaxes(tickangle=315)
fig.update_xaxes(tickfont_size=10)
fig.show()